如何判断梯度下降法中梯度值是正确的?- 极限求导

Debug的时候,可以用极限求导的定义来算出梯度值(效率低),用来验证向量化矩阵运算获得的梯度值是否正确。

def J(theta, X_b, y):
    try:
       return np.sum((X_b.dot(theta) - y) ** 2) / len(X_b)
    except:
       return float('inf')

def dJ_math(theta, X_b, y): # 用矩阵运算求导
    return X_b.T.dot(X_b.dot(theta) - y) * 2. / len(X_b)
    
def dJ_debug(theta, X_b, y, epsilon=0.01): # 用极限方法求导
    res = np.empty(len(theta))
    for i in range(len(theta)):
       theta_1 = theta.copy()
       theta_2 = theta.copy()
       theta_1[i] += epsilon
       theta_2[i] -= epsilon
       res[i] = (J(theta_1, X_b, y) - J(theta_2, X_b, y)) /(2*epsilon)
    return res

# implemnented in playML.LinearRegression
def fit_gd_debug(self, dJ, X_train, y_train, n_iters=1e4):
    assert X_train.shape[0] == y_train.shape[0], "The X_train's size should be equal to the y_train's size"

    def gradient_descent(dJ, X_b, y, initial_theta, eta, n_iters, epsilon=1e-8):
        theta = initial_theta
        i_iter = 0

        while (i_iter < n_iters):
            last_theta = theta
            gradient = dJ(theta, X_b, y)
            theta = theta - eta*gradient
            if np.abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon:
                break
            i_iter += 1
        return theta

    X_b = np.hstack((np.ones((len(X_train), 1)), X_train))
    initial_theta = np.zeros(X_b.shape[1])
    eta=0.01
    self._theta = gradient_descent(dJ, X_b, y_train, initial_theta, eta,  n_iters)
    self.coef_ = self._theta[1:]
    self.intercept_ = self._theta[0]
    return self

测试代码:

import numpy as np
import matplotlib.pyplot as plt
np.random.seed(666)
X=np.random.random(size=(1000,10))
true_theta = np.arange(1, 12, dtype=float)
X_b = np.hstack([np.ones((len(X), 1)), X])
y = X_b.dot(true_theta) + np.random.normal(size=1000)
import sys
sys.path.append(r'C:\\N-20KEPC0Y7KFA-Data\\junhuawa\\Documents\\00-Play-with-ML-in-Python\\Jupyter')
import playML
from playML.LinearRegression import LinearRegression

lin_reg1 = LinearRegression()
lin_reg1.fit_gd_debug(dJ_math, X, y)
lin_reg1._theta

lin_reg2 = LinearRegression()
lin_reg2.fit_gd_debug(dJ_debug, X, y)
lin_reg2._theta