import  numpy as np
x=2*np.random.random(size=100)
y=x*3.+4.+np.random.normal(size=100)
x=x.reshape(-1,1)


def dJ(theta,X_b,y):
res=np.empty(len(theta))
res[0]=np.sum(X_b.dot(theta)-y)
for i in range(1,len(theta)):
res[i]=(X_b.dot(theta)-y).dot(X_b[:,i])
return res*2/len(X_b)

def J(theta,X_b,y):
return np.sum((X_b.dot(theta)-y)**2)/len(X_b)

def gradient(X_b,y,initial,eta=0.01,iters=1e4,epsilon=1e-8):
theta=initial
i=0
while i<iters:
grad = dJ(theta, X_b, y)
last_theta = theta;
theta = theta - grad * eta
if abs(J(theta,X_b,y)-J(last_theta,X_b,y))<epsilon:
break
i+=1
return theta



X_b=np.hstack([np.ones([len(x),1]),x])

initial_theta=np.zeros(X_b.shape[1])
theta=gradient(X_b,y,initial_theta)
print(theta)