ft_linear_regression
This commit is contained in:
parent
d5134e3903
commit
6898b2c39b
|
@ -0,0 +1,83 @@
|
|||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
class LinearRegression:
|
||||
def __init__(self, thetas, data, epochs = 1000, learning_rate = 0.001):
|
||||
self.cost = []
|
||||
self.epochs = epochs
|
||||
self.learning_rate = learning_rate
|
||||
self.raw_thetas = thetas
|
||||
self.raw_data = data
|
||||
self.__get_scaled_data()
|
||||
try:
|
||||
self.__get_scaled_thetas()
|
||||
except:
|
||||
print('error in raw_thetas format, setting thetas to 0')
|
||||
self.thetas = np.zeros(self.raw_data.shape[1])
|
||||
self.raw_thetas = np.zeros(self.raw_data.shape[1])
|
||||
|
||||
def gradient_descent(self):
|
||||
for i in range (0, self.epochs):
|
||||
self.thetas = self.__gradient_descent_epoch()
|
||||
self.cost.append(self.get_cost())
|
||||
self.raw_thetas = np.empty(len(self.thetas))
|
||||
for i in range(1, self.thetas.shape[0]):
|
||||
self.raw_thetas[i] = self.thetas[i] / (max(self.raw_data[:, i - 1]) - min(self.raw_data[:, i - 1]))
|
||||
self.raw_thetas[0] = np.mean(self.raw_data[:, self.raw_data.shape[1] - 1])
|
||||
for i in range(1, self.raw_data.shape[1]):
|
||||
self.raw_thetas[0] -= self.raw_thetas[i] * np.mean(self.raw_data[:, i - 1])
|
||||
|
||||
def get_cost(self):
|
||||
cost = 0;
|
||||
for i in range (1, self.data.shape[0]):
|
||||
cost += (self.__predict(i) - self.data_y[i]) ** 2
|
||||
cost /= float(self.data.shape[0])
|
||||
return cost
|
||||
|
||||
def show(self):
|
||||
plt.subplot(1, 2, 1)
|
||||
plt.plot(self.raw_data[:, 0], self.raw_data[:, 1], 'r.')
|
||||
print(max(self.raw_data[:, 0]))
|
||||
t0 = np.mean(self.data_y) - (np.mean(self.raw_data[:, 0]) * self.thetas[1])
|
||||
plt.plot([0, max(self.raw_data[:, 0])], [self.raw_thetas[0], self.raw_thetas[0] + self.raw_thetas[1] * max(self.raw_data[:, 0])])
|
||||
plt.ylabel('y')
|
||||
plt.xlabel('x')
|
||||
plt.subplot(1, 2, 2)
|
||||
plt.plot(self.cost)
|
||||
plt.ylabel('cost')
|
||||
plt.xlabel('epochs')
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
# Adds a column filled with 1 (So Theta0 * x0 = Theta0) and apply MinMax normalization to the raw data
|
||||
def __get_scaled_data(self):
|
||||
self.data = np.empty(shape=(self.raw_data.shape[0], self.raw_data.shape[1]))
|
||||
self.data[:, 0] = 1
|
||||
self.data_y = np.empty(shape=(1, self.raw_data.shape[0]))
|
||||
self.data_y = self.raw_data[:, self.raw_data.shape[1] - 1]
|
||||
for i in range(0, self.data.shape[1] - 1):
|
||||
self.data[:, i + 1] = self.raw_data[:, i]
|
||||
for i in range(1, self.data.shape[1]):
|
||||
self.data[:, i] = (self.data[:, i] - min(self.data[:, i])) / (max(self.data[:, i]) - min(self.data[:, i]))
|
||||
|
||||
def __get_scaled_thetas(self):
|
||||
self.thetas = np.empty(self.raw_data.shape[1])
|
||||
self.thetas[0] = self.raw_thetas[len(self.raw_thetas) - 1]
|
||||
for i in range(0, self.raw_data.shape[1] - 1):
|
||||
self.thetas[i + 1] = self.raw_thetas[i + 1] * (max(self.raw_data[:, i]) - min(self.raw_data[:, i]))
|
||||
|
||||
def __gradient_descent_epoch(self):
|
||||
new_thetas = np.zeros(self.data.shape[1])
|
||||
for i in range(len(self.data)):
|
||||
delta = self.__predict(i) - self.data_y[i]
|
||||
for j in range(self.data.shape[1]):
|
||||
new_thetas[j] += delta * self.data[i, j]
|
||||
for i in range(self.data.shape[1]):
|
||||
new_thetas[i] = self.thetas[i] - self.learning_rate / float(len(self.data)) * new_thetas[i]
|
||||
return new_thetas
|
||||
|
||||
def __predict(self, row):
|
||||
h = 0
|
||||
for i in range(self.data.shape[1]):
|
||||
h += self.thetas[i] * self.data[row, i]
|
||||
return (h);
|
|
@ -0,0 +1,25 @@
|
|||
km,price
|
||||
240000,3650
|
||||
139800,3800
|
||||
150500,4400
|
||||
185530,4450
|
||||
176000,5250
|
||||
114800,5350
|
||||
166800,5800
|
||||
89000,5990
|
||||
144500,5999
|
||||
84000,6200
|
||||
82029,6390
|
||||
63060,6390
|
||||
74000,6600
|
||||
97500,6800
|
||||
67000,6800
|
||||
76025,6900
|
||||
48235,6900
|
||||
93000,6990
|
||||
60949,7490
|
||||
65674,7555
|
||||
54000,7990
|
||||
68500,7990
|
||||
22899,7990
|
||||
61789,8290
|
|
|
@ -0,0 +1,25 @@
|
|||
km,price
|
||||
240000,8650
|
||||
139800,8800
|
||||
150500,9400
|
||||
185530,9450
|
||||
176000,10250
|
||||
114800,10350
|
||||
166800,10800
|
||||
89000,10990
|
||||
144500,10999
|
||||
84000,11200
|
||||
82029,11390
|
||||
63060,11390
|
||||
74000,11600
|
||||
97500,11800
|
||||
67000,11800
|
||||
76025,11900
|
||||
48235,11900
|
||||
93000,11990
|
||||
60949,12490
|
||||
65674,12555
|
||||
54000,12990
|
||||
68500,12990
|
||||
22899,12990
|
||||
61789,13290
|
|
|
@ -0,0 +1,25 @@
|
|||
age,km,price
|
||||
10,240000,8050
|
||||
15,139800,8200
|
||||
18,150500,7400
|
||||
2,185530,9250
|
||||
6,176000,9950
|
||||
8,114800,9950
|
||||
10,166800,9800
|
||||
25,89000,7990
|
||||
4,144500,10499
|
||||
1,84000,11000
|
||||
0,82029,12000
|
||||
2,63060,11300
|
||||
7,74000,11000
|
||||
4,97500,11000
|
||||
9,67000,10000
|
||||
4,76025,11200
|
||||
4,48235,11300
|
||||
7,93000,10500
|
||||
1,60949,12300
|
||||
6,65674,11800
|
||||
5,54000,12500
|
||||
4,68500,12600
|
||||
2,22899,12800
|
||||
7,61789,12590
|
|
|
@ -0,0 +1,86 @@
|
|||
import argparse
|
||||
import numpy as np
|
||||
from train import train
|
||||
from LinearRegression import LinearRegression
|
||||
from matplotlib import animation
|
||||
|
||||
|
||||
def get_thetas(path):
|
||||
try:
|
||||
thetas = np.genfromtxt(path, delimiter=',')
|
||||
except:
|
||||
print ('info: path not found, asking for train')
|
||||
return "error"
|
||||
try:
|
||||
return [thetas[0], thetas[1]]
|
||||
except:
|
||||
print ('warning: path in wrong format, asking for train')
|
||||
return "error"
|
||||
|
||||
def get_price(mileage, thetas):
|
||||
return (thetas[0] + mileage * thetas[1])
|
||||
|
||||
def predict_subject():
|
||||
thetas = get_thetas('thetas.csv')
|
||||
if thetas == "error":
|
||||
print("")
|
||||
try:
|
||||
input_thetas = input("model isn't trained, would you like to train it before predicting car price ? y/n\n");
|
||||
if (input_thetas == 'y'):
|
||||
raw_data = np.genfromtxt('data.csv', delimiter=',', skip_header=1)
|
||||
train(raw_data, np.zeros(2), 'thetas.csv', False);
|
||||
thetas = np.genfromtxt('thetas.csv', delimiter=',')
|
||||
else:
|
||||
print ('info: wrong input format, setting thetas to 0')
|
||||
thetas = [0, 0]
|
||||
except:
|
||||
print ('info: wrong input format or fail to train, setting thetas to 0')
|
||||
thetas = [0, 0]
|
||||
try:
|
||||
mileage = int(eval(input("Enter mileage\n")))
|
||||
except:
|
||||
print ('info: input a number')
|
||||
return
|
||||
if mileage < 0:
|
||||
print ('info: mileage should be superior to 0 ! aborting')
|
||||
return
|
||||
price = get_price(mileage, thetas)
|
||||
if price < 0:
|
||||
print('This car belongs in a museum ! (price inferior to 0)')
|
||||
else:
|
||||
print('Predicted car value is ', price,)
|
||||
|
||||
def get_y(x, thetas):
|
||||
h = 0
|
||||
for i in range(len(x)):
|
||||
h += x[i] * thetas[i]
|
||||
return h
|
||||
|
||||
def predict(thetas_path):
|
||||
try:
|
||||
thetas = np.genfromtxt(thetas_path,delimiter=',')
|
||||
except:
|
||||
print('wrong name or format')
|
||||
return
|
||||
x = np.empty(len(thetas) - 1)
|
||||
x[0] = 1;
|
||||
for i in range(1 ,len(thetas) - 1):
|
||||
try:
|
||||
s = "Enter feature " + str(i) + "\n"
|
||||
x[i] = int(eval(input(s)))
|
||||
except:
|
||||
print('input a number')
|
||||
return
|
||||
print("y is equal to ", get_y(x, thetas))
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='DSLR is a 2 day project, if you do Linear Regression in two weeks :pepethefrog:')
|
||||
parser.add_argument("-p", "--path", type=str, default=False, help="thetas file path")
|
||||
args = parser.parse_args()
|
||||
if (args.path == False):
|
||||
predict_subject()
|
||||
else:
|
||||
predict(args.path)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,55 @@
|
|||
import argparse
|
||||
import numpy as np
|
||||
from LinearRegression import LinearRegression
|
||||
from matplotlib import animation
|
||||
|
||||
def get_thetas():
|
||||
try:
|
||||
thetas = np.genfromtxt('thetas.csv', delimiter=',')
|
||||
except:
|
||||
print ("info: thetas.csv not found, setting thetas to 0")
|
||||
return [0, 0]
|
||||
try:
|
||||
return [thetas[2], thetas[1]]
|
||||
except:
|
||||
print ("warning: thetas.csv in wrong format, setting thetas to 0")
|
||||
return [0, 0]
|
||||
|
||||
def train(raw_data, raw_thetas, thetas_path, visu):
|
||||
lr = LinearRegression(thetas = raw_thetas, data = raw_data, epochs = 10000, learning_rate = 0.1)
|
||||
print(lr.raw_data)
|
||||
print("thetas before train")
|
||||
print(lr.raw_thetas[:len(lr.raw_thetas) - 1])
|
||||
lr.gradient_descent()
|
||||
print("thetas after train")
|
||||
print(lr.raw_thetas)
|
||||
tosave = []
|
||||
for i in range(len(lr.raw_thetas)):
|
||||
tosave.append(lr.raw_thetas[i])
|
||||
tosave.append(lr.thetas[0])
|
||||
np.savetxt(thetas_path, tosave, delimiter=',')
|
||||
if (visu and len(lr.thetas) == 2):
|
||||
lr.show()
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='DSLR is a 2 day project, if you do Linear Regression in two weeks :pepethefrog:')
|
||||
parser.add_argument("-p", "--path", type=str, default='data.csv', help="data file path")
|
||||
parser.add_argument("-t", "--thetas", type=str, default=False, help="thetas file path")
|
||||
parser.add_argument("--visu", default=False, help="plot data on graph", action="store_true")
|
||||
args = parser.parse_args()
|
||||
if (args.thetas == False):
|
||||
args.thetas = 'thetas.csv'
|
||||
try:
|
||||
raw_data = np.genfromtxt(args.path, delimiter=',', skip_header=1)
|
||||
except:
|
||||
print('csv file not found or wrong')
|
||||
return
|
||||
try:
|
||||
raw_thetas = np.genfromtxt(args.thetas, delimiter=',')
|
||||
except:
|
||||
print('thetas file not found or wrong, setting thetas to 0')
|
||||
raw_thetas = np.zeros(raw_data.shape[1])
|
||||
train(raw_data, raw_thetas, args.thetas, args.visu)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue