import numpy as np import matplotlib.pyplot as plt class LinearRegression: def __init__(self, thetas, data, epochs = 1000, learning_rate = 0.001): self.cost = [] self.epochs = epochs self.learning_rate = learning_rate self.raw_thetas = thetas self.raw_data = data self.__get_scaled_data() try: self.__get_scaled_thetas() except: print('error in raw_thetas format, setting thetas to 0') self.thetas = np.zeros(self.raw_data.shape[1]) self.raw_thetas = np.zeros(self.raw_data.shape[1]) def gradient_descent(self): for i in range (0, self.epochs): self.thetas = self.__gradient_descent_epoch() self.cost.append(self.get_cost()) self.raw_thetas = np.empty(len(self.thetas)) for i in range(1, self.thetas.shape[0]): self.raw_thetas[i] = self.thetas[i] / (max(self.raw_data[:, i - 1]) - min(self.raw_data[:, i - 1])) self.raw_thetas[0] = np.mean(self.raw_data[:, self.raw_data.shape[1] - 1]) for i in range(1, self.raw_data.shape[1]): self.raw_thetas[0] -= self.raw_thetas[i] * np.mean(self.raw_data[:, i - 1]) def get_cost(self): cost = 0; for i in range (1, self.data.shape[0]): cost += (self.__predict(i) - self.data_y[i]) ** 2 cost /= float(self.data.shape[0]) return cost def show(self): plt.subplot(1, 2, 1) plt.plot(self.raw_data[:, 0], self.raw_data[:, 1], 'r.') print(max(self.raw_data[:, 0])) t0 = np.mean(self.data_y) - (np.mean(self.raw_data[:, 0]) * self.thetas[1]) plt.plot([0, max(self.raw_data[:, 0])], [self.raw_thetas[0], self.raw_thetas[0] + self.raw_thetas[1] * max(self.raw_data[:, 0])]) plt.ylabel('y') plt.xlabel('x') plt.subplot(1, 2, 2) plt.plot(self.cost) plt.ylabel('cost') plt.xlabel('epochs') plt.tight_layout() plt.show() # Adds a column filled with 1 (So Theta0 * x0 = Theta0) and apply MinMax normalization to the raw data def __get_scaled_data(self): self.data = np.empty(shape=(self.raw_data.shape[0], self.raw_data.shape[1])) self.data[:, 0] = 1 self.data_y = np.empty(shape=(1, self.raw_data.shape[0])) self.data_y = self.raw_data[:, self.raw_data.shape[1] - 1] for i in range(0, self.data.shape[1] - 1): self.data[:, i + 1] = self.raw_data[:, i] for i in range(1, self.data.shape[1]): self.data[:, i] = (self.data[:, i] - min(self.data[:, i])) / (max(self.data[:, i]) - min(self.data[:, i])) def __get_scaled_thetas(self): self.thetas = np.empty(self.raw_data.shape[1]) self.thetas[0] = self.raw_thetas[len(self.raw_thetas) - 1] for i in range(0, self.raw_data.shape[1] - 1): self.thetas[i + 1] = self.raw_thetas[i + 1] * (max(self.raw_data[:, i]) - min(self.raw_data[:, i])) def __gradient_descent_epoch(self): new_thetas = np.zeros(self.data.shape[1]) for i in range(len(self.data)): delta = self.__predict(i) - self.data_y[i] for j in range(self.data.shape[1]): new_thetas[j] += delta * self.data[i, j] for i in range(self.data.shape[1]): new_thetas[i] = self.thetas[i] - self.learning_rate / float(len(self.data)) * new_thetas[i] return new_thetas def __predict(self, row): h = 0 for i in range(self.data.shape[1]): h += self.thetas[i] * self.data[row, i] return (h);