function gd(x,y, depth, max_it, tol): # tolerance
w = initialize_weights(x.num_feats)
old_loss = infinity
for iterations(1, max_it):
y_hat = x * w
error = y_hat - y
N = x.num_samples
gradient = (x_transpose * error) / N
w_new = w - (alpha * gradient)
new_loss = calc_loss(y_hat, y)
if (abs(old_loss - new_loss) < tol):
break
w = w_new
old_loss = new_loss
return w