function gd(x,y, depth, max_it, tol): # tolerance
	w = initialize_weights(x.num_feats)
	old_loss = infinity
	for iterations(1, max_it):
		y_hat = x * w
		error = y_hat - y
		N = x.num_samples
		gradient = (x_transpose * error) / N
		w_new = w - (alpha * gradient)
		new_loss = calc_loss(y_hat, y)
		if (abs(old_loss - new_loss) < tol):
			break
		w = w_new
		old_loss = new_loss
	return w