ft_linear_regression/LinearRegression.py

84 lines
3.5 KiB
Python

import numpy as np
import matplotlib.pyplot as plt
class LinearRegression:
def __init__(self, thetas, data, epochs = 1000, learning_rate = 0.001):
self.cost = []
self.epochs = epochs
self.learning_rate = learning_rate
self.raw_thetas = thetas
self.raw_data = data
self.__get_scaled_data()
try:
self.__get_scaled_thetas()
except:
print('error in raw_thetas format, setting thetas to 0')
self.thetas = np.zeros(self.raw_data.shape[1])
self.raw_thetas = np.zeros(self.raw_data.shape[1])
def gradient_descent(self):
for i in range (0, self.epochs):
self.thetas = self.__gradient_descent_epoch()
self.cost.append(self.get_cost())
self.raw_thetas = np.empty(len(self.thetas))
for i in range(1, self.thetas.shape[0]):
self.raw_thetas[i] = self.thetas[i] / (max(self.raw_data[:, i - 1]) - min(self.raw_data[:, i - 1]))
self.raw_thetas[0] = np.mean(self.raw_data[:, self.raw_data.shape[1] - 1])
for i in range(1, self.raw_data.shape[1]):
self.raw_thetas[0] -= self.raw_thetas[i] * np.mean(self.raw_data[:, i - 1])
def get_cost(self):
cost = 0;
for i in range (1, self.data.shape[0]):
cost += (self.__predict(i) - self.data_y[i]) ** 2
cost /= float(self.data.shape[0])
return cost
def show(self):
plt.subplot(1, 2, 1)
plt.plot(self.raw_data[:, 0], self.raw_data[:, 1], 'r.')
print(max(self.raw_data[:, 0]))
t0 = np.mean(self.data_y) - (np.mean(self.raw_data[:, 0]) * self.thetas[1])
plt.plot([0, max(self.raw_data[:, 0])], [self.raw_thetas[0], self.raw_thetas[0] + self.raw_thetas[1] * max(self.raw_data[:, 0])])
plt.ylabel('y')
plt.xlabel('x')
plt.subplot(1, 2, 2)
plt.plot(self.cost)
plt.ylabel('cost')
plt.xlabel('epochs')
plt.tight_layout()
plt.show()
# Adds a column filled with 1 (So Theta0 * x0 = Theta0) and apply MinMax normalization to the raw data
def __get_scaled_data(self):
self.data = np.empty(shape=(self.raw_data.shape[0], self.raw_data.shape[1]))
self.data[:, 0] = 1
self.data_y = np.empty(shape=(1, self.raw_data.shape[0]))
self.data_y = self.raw_data[:, self.raw_data.shape[1] - 1]
for i in range(0, self.data.shape[1] - 1):
self.data[:, i + 1] = self.raw_data[:, i]
for i in range(1, self.data.shape[1]):
self.data[:, i] = (self.data[:, i] - min(self.data[:, i])) / (max(self.data[:, i]) - min(self.data[:, i]))
def __get_scaled_thetas(self):
self.thetas = np.empty(self.raw_data.shape[1])
self.thetas[0] = self.raw_thetas[len(self.raw_thetas) - 1]
for i in range(0, self.raw_data.shape[1] - 1):
self.thetas[i + 1] = self.raw_thetas[i + 1] * (max(self.raw_data[:, i]) - min(self.raw_data[:, i]))
def __gradient_descent_epoch(self):
new_thetas = np.zeros(self.data.shape[1])
for i in range(len(self.data)):
delta = self.__predict(i) - self.data_y[i]
for j in range(self.data.shape[1]):
new_thetas[j] += delta * self.data[i, j]
for i in range(self.data.shape[1]):
new_thetas[i] = self.thetas[i] - self.learning_rate / float(len(self.data)) * new_thetas[i]
return new_thetas
def __predict(self, row):
h = 0
for i in range(self.data.shape[1]):
h += self.thetas[i] * self.data[row, i]
return (h);