Что не так с моей реализацией градиентного спуска (классификатор SVM с потерей шарнира)

Что не так с моей реализацией градиентного спуска (классификатор SVM с потерей шарнира) ⇐ Python

1 сообщение • Страница 1 из 1

Anonymous

Что не так с моей реализацией градиентного спуска (классификатор SVM с потерей шарнира)

Цитата

Сообщение Anonymous » 05 окт 2024, 00:22

Я пытаюсь реализовать и обучить многоклассовый классификатор SVM с нуля, используя Python и numpy в блокнотах Jupyter.
Я использую курс CS231n в качестве базы знаний, особенно эта страница: https://cs231n.github.io/optimization-1/, где обсуждается градиентный спуск. Я реализовал класс SVM, который, по моему мнению, находится на правильном пути.
Вот базовый профиль этого класса:

Код: Выделить всё

class SVM:
  def __init__(self):
    self.weights = np.random.randn(len(labels), X_train.shape[1]) * 0.1
    self.history = []

  def predict(self, X):
    '''
    returns class predictions in np array of size
    n x num_classes, where n is the number of examples in X
    '''

    #matrix multiplication to apply weights to X
    bounds = self.weights @ X.T

    #return the predictions
    return np.array(bounds).T

  def loss(self, scores, y, delta=1):
'''computes the loss'''
    #calculate and return the loss for a prediction and corresponding truth label
    #hinge loss in this case
    total_loss = 0

    #compute loss for each example...
    for i in range(len(scores)):
      #extract values for this example
      scores_of_x = scores[i]
      label = y[i]
      correct_score = scores_of_x[label]
      incorrect_scores = np.concatenate((scores_of_x[:label], scores_of_x[label+1:]))

      #use the scores for example x to compute the loss at x
      wj_xi = correct_score           #these should be a vector of INCORRECT scores
      wyi_xi = incorrect_scores       #this should be a vector of the CORRECT score
      wy_xi = wj_xi - wyi_xi + delta  #core of the hinge loss formula
      losses = np.maximum(0, wy_xi)   #lower bound the losses at 0
      loss = np.sum(losses)           #sum the losses

      #add to the total loss
      total_loss += loss

    #return the loss
    avg_loss = total_loss / len(scores)
    return avg_loss

  def gradient(self, scores, X, y, delta=1):
'''computes the gradient'''
    #calculate the loss and the gradient of the loss function
    #gradient of hinge loss function
    gradient = np.zeros(self.weights.shape)

    #calculate the gradient in each example in x
    for i in range(len(X)):
      #extract values for this example
      scores_of_x = scores[i]
      label = y[i]
      x = X[i]
      correct_score = scores_of_x[label]
      incorrect_scores = np.concatenate((scores_of_x[:label], scores_of_x[label+1:]))

      #
      ##
      ### start by computing the gradient of the weights of the correct classifier
      ##
      #
      wj_xi = correct_score           #these should be a vector of INCORRECT scores
      wyi_xi = incorrect_scores       #this should be a vector of the CORRECT score
      wy_xi = wj_xi - wyi_xi + delta  #core of the hinge loss formula
      losses = np.maximum(0, wy_xi)   #lower bound the losses at 0

      #get number of nonzero losses, and scale data vector by them to get the loss
      num_contributing_classifiers = np.count_nonzero(losses)
      #print(f"Num loss contributors: {num_contributing_classifiers}")
      g = -1 * x * num_contributing_classifiers   #NOTE the -, very important here, doesn't apply to other scores

      #add the gradient of the correct classifier to the gradient
      gradient[label] += g  #because arrays are 0-indexed, but the labels are 1-indexed
      # print(f"correct label: {label}")
      #print(f"gradient:\n{gradient}")
      #
      ##
      ### then, compute the gradient of the weights for each incorrect classifier
      ##
      #
      for j in range(len(scores_of_x)):

        #skip the correct score, since we already did it
        if j == label:
          continue
        wj_xi = scores_of_x[j]          #should be a vector containing the score of the CURRENT classifier
        wyi_xi = correct_score          #should be a vector containing the score of the CORRECT classifier
        wy_xi = wj_xi - wyi_xi + delta  #core of the hinge loss formula
        loss = np.maximum(0, wy_xi)   #lower bound the loss at 0

        #get whether this classifier contributed to the loss, and scale the data vector by that to get the gradient
        contributed_to_loss = 0
        if loss >  0:
          contributed_to_loss = 1

        g = x * contributed_to_loss        #either times 1 or times 0

        #add the gradient of the incorrect classifier to the gradient
        gradient[j] += g

    #divide the gradient by number of examples to get the average gradient
    return gradient / len(X)

  def fit(self, X, y, epochs = 1000, batch_size = 256, lr=1e-2, verbose=True):
    #gradient descent loop
    for epoch in range(epochs):
      self.history.append({'epoch': epoch})

      #create a batch of samples to calculate the gradient
      #NOTE: this significantly boosts the speed of training
      indices = np.random.choice(len(X), batch_size, replace=False)
      X_batch = X.iloc[indices]
      y_batch = y.iloc[indices]
     
      X_batch = X_batch.to_numpy()
      y_batch = y_batch.to_numpy()

      #evaluate class scores on training set
      predictions = self.predict(X_batch)
      predicted_classes = np.argmax(predictions, axis=1)

      #compute the loss: average hinge loss
      loss = self.loss(predictions, y_batch)
      self.history[-1]['loss'] = loss

      #compute accuracy on the test set, for an intuitive metric
      accuracy = np.mean(predicted_classes == y_batch)
      self.history[-1]['accuracy'] = accuracy

#print progress
      if epoch%50 == 0 and verbose:
        print(f"Epoch: {epoch} | Loss: {loss} | Accuracy: {accuracy} | LR: {lr} \n")

      #compute the gradient on the scores assigned by the classifier
      gradient = self.gradient(predictions, X_batch, y_batch)
     
      #backpropagate the gradient to the weights + bias
      step = gradient * lr

      #perform a parameter update, in the negative??? direction of the gradient
      self.weights += step

Это моя реализация. Метод fit() — это метод, который обучает веса переданных данных. Я нахожусь на этапе, когда потери имеют тенденцию уменьшаться от одной итерации к другой.
Но проблема то есть точность падает до нуля, даже когда потери уменьшаются.
Я знаю, что они не связаны напрямую, но разве моя точность не должна обычно расти по мере уменьшения потерь? Это заставляет меня думать, что я сделал что-то не так в методах loss() и градиент(). Но я не могу найти, где я ошибся. Кроме того, иногда мои потери увеличиваются от одной эпохи к другой. Возможно, это повлияло на мою пакетную оценку градиента, но я не уверен.
Вот ссылка на мой блокнот Jupyter, который позволит вам запустить мой код в его текущем состоянии. :
https://colab.research.google.com/drive ... TUQlscWksP
И вот ссылка на набор данных, который я использую: https: //www.kaggle.com/datasets/taweilo/fish-sp ... -data/code

Подробнее здесь: https://stackoverflow.com/questions/790 ... with-hinge

1728076923

Anonymous

Я пытаюсь реализовать и обучить многоклассовый классификатор SVM с нуля, используя Python и numpy в блокнотах Jupyter.
Я использую курс CS231n в качестве базы знаний, особенно эта страница: https://cs231n.github.io/optimization-1/, где обсуждается градиентный спуск.   Я реализовал класс SVM, который, по моему мнению, находится на правильном пути.
Вот базовый профиль этого класса:
[code]class SVM:
  def __init__(self):
    self.weights = np.random.randn(len(labels), X_train.shape[1]) * 0.1
    self.history = []

  def predict(self, X):
    '''
    returns class predictions in np array of size
    n x num_classes, where n is the number of examples in X
    '''

    #matrix multiplication to apply weights to X
    bounds = self.weights @ X.T

    #return the predictions
    return np.array(bounds).T

  def loss(self, scores, y, delta=1):
'''computes the loss'''
    #calculate and return the loss for a prediction and corresponding truth label
    #hinge loss in this case
    total_loss = 0

    #compute loss for each example...
    for i in range(len(scores)):
      #extract values for this example
      scores_of_x = scores[i]
      label = y[i]
      correct_score = scores_of_x[label]
      incorrect_scores = np.concatenate((scores_of_x[:label], scores_of_x[label+1:]))

      #use the scores for example x to compute the loss at x
      wj_xi = correct_score           #these should be a vector of INCORRECT scores
      wyi_xi = incorrect_scores       #this should be a vector of the CORRECT score
      wy_xi = wj_xi - wyi_xi + delta  #core of the hinge loss formula
      losses = np.maximum(0, wy_xi)   #lower bound the losses at 0
      loss = np.sum(losses)           #sum the losses

      #add to the total loss
      total_loss += loss

    #return the loss
    avg_loss = total_loss / len(scores)
    return avg_loss

  def gradient(self, scores, X, y, delta=1):
'''computes the gradient'''
    #calculate the loss and the gradient of the loss function
    #gradient of hinge loss function
    gradient = np.zeros(self.weights.shape)

    #calculate the gradient in each example in x
    for i in range(len(X)):
      #extract values for this example
      scores_of_x = scores[i]
      label = y[i]
      x = X[i]
      correct_score = scores_of_x[label]
      incorrect_scores = np.concatenate((scores_of_x[:label], scores_of_x[label+1:]))

      #
      ##
      ### start by computing the gradient of the weights of the correct classifier
      ##
      #
      wj_xi = correct_score           #these should be a vector of INCORRECT scores
      wyi_xi = incorrect_scores       #this should be a vector of the CORRECT score
      wy_xi = wj_xi - wyi_xi + delta  #core of the hinge loss formula
      losses = np.maximum(0, wy_xi)   #lower bound the losses at 0

      #get number of nonzero losses, and scale data vector by them to get the loss
      num_contributing_classifiers = np.count_nonzero(losses)
      #print(f"Num loss contributors: {num_contributing_classifiers}")
      g = -1 * x * num_contributing_classifiers   #NOTE the -, very important here, doesn't apply to other scores

      #add the gradient of the correct classifier to the gradient
      gradient[label] += g  #because arrays are 0-indexed, but the labels are 1-indexed
      # print(f"correct label: {label}")
      #print(f"gradient:\n{gradient}")
      #
      ##
      ### then, compute the gradient of the weights for each incorrect classifier
      ##
      #
      for j in range(len(scores_of_x)):

        #skip the correct score, since we already did it
        if j == label:
          continue
        wj_xi = scores_of_x[j]          #should be a vector containing the score of the CURRENT classifier
        wyi_xi = correct_score          #should be a vector containing the score of the CORRECT classifier
        wy_xi = wj_xi - wyi_xi + delta  #core of the hinge loss formula
        loss = np.maximum(0, wy_xi)   #lower bound the loss at 0

        #get whether this classifier contributed to the loss, and scale the data vector by that to get the gradient
        contributed_to_loss = 0
        if loss >  0:
          contributed_to_loss = 1

        g = x * contributed_to_loss        #either times 1 or times 0

        #add the gradient of the incorrect classifier to the gradient
        gradient[j] += g

    #divide the gradient by number of examples to get the average gradient
    return gradient / len(X)

  def fit(self, X, y, epochs = 1000, batch_size = 256, lr=1e-2, verbose=True):
    #gradient descent loop
    for epoch in range(epochs):
      self.history.append({'epoch': epoch})

      #create a batch of samples to calculate the gradient
      #NOTE: this significantly boosts the speed of training
      indices = np.random.choice(len(X), batch_size, replace=False)
      X_batch = X.iloc[indices]
      y_batch = y.iloc[indices]
     
      X_batch = X_batch.to_numpy()
      y_batch = y_batch.to_numpy()

      #evaluate class scores on training set
      predictions = self.predict(X_batch)
      predicted_classes = np.argmax(predictions, axis=1)

      #compute the loss: average hinge loss
      loss = self.loss(predictions, y_batch)
      self.history[-1]['loss'] = loss

      #compute accuracy on the test set, for an intuitive metric
      accuracy = np.mean(predicted_classes == y_batch)
      self.history[-1]['accuracy'] = accuracy

#print progress
      if epoch%50 == 0 and verbose:
        print(f"Epoch: {epoch} | Loss: {loss} | Accuracy: {accuracy} | LR: {lr} \n")

      #compute the gradient on the scores assigned by the classifier
      gradient = self.gradient(predictions, X_batch, y_batch)
     
      #backpropagate the gradient to the weights + bias
      step = gradient * lr

      #perform a parameter update, in the negative??? direction of the gradient
      self.weights += step
[/code]
Это моя реализация.  Метод fit() — это метод, который обучает веса переданных данных. Я нахожусь на этапе, когда потери имеют тенденцию уменьшаться от одной итерации к другой.
Но проблема то есть точность падает до нуля, даже когда потери уменьшаются.
Я знаю, что они не связаны напрямую, но разве моя точность не должна обычно расти по мере уменьшения потерь?  Это заставляет меня думать, что я сделал что-то не так в методах loss() и градиент().  Но я не могу найти, где я ошибся.  Кроме того, иногда мои потери увеличиваются от одной эпохи к другой.  Возможно, это повлияло на мою пакетную оценку градиента, но я не уверен.
Вот ссылка на мой блокнот Jupyter, который позволит вам запустить мой код в его текущем состоянии. :
https://colab.research.google.com/drive/12z4DevKDicmT4iE6AlMGrRiN6He8R9_4#scrollTo=uBTUQlscWksP
И вот ссылка на набор данных, который я использую: https: //www.kaggle.com/datasets/taweilo/fish-species-sampling-weight-and-height-data/code 

Подробнее здесь: [url]https://stackoverflow.com/questions/79055573/what-is-wrong-with-my-gradient-descent-implementation-svm-classifier-with-hinge[/url]

Ответить Пред. тема След. тема

1 сообщение • Страница 1 из 1

Быстрый ответ

Заголовок:

Имя пользователя:

Изменение регистра текста:

Смайлики

Ещё смайлики…

К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми. Можно прикреплять файлы, перетаскивая их в окно сообщения.

Максимально разрешённый размер вложения: 15 МБ.

Имя файла:

Комментарий к файлу:

Имя файла	Комментарий к файлу	Размер	Статус

Похожие темы

Ответы

Просмотры

Последнее сообщение

Что не так с моей реализацией градиентного спуска (классификатор SVM с потерей шарнира)

Последнее сообщение Anonymous « 04 окт 2024, 23:03
Добавлено в форуме Python

Anonymous » 04 окт 2024, 23:03 » в форуме Python

В последнее время я пытаюсь узнать как можно больше об искусственном интеллекте и машинном обучении. Часть этого пути для меня заключалась в попытке реализовать многие системы, общие для задач машинного обучения, с «нуля», используя Python и...

0 Ответы

11 Просмотры

Последнее сообщение Anonymous
04 окт 2024, 23:03
Что не так с моей реализацией градиентного спуска (классификатор SVM с потерей шарнира)

Последнее сообщение Anonymous « 23 окт 2024, 20:05
Добавлено в форуме Python

Anonymous » 23 окт 2024, 20:05 » в форуме Python

Я пытаюсь реализовать и обучить многоклассовый классификатор SVM с нуля, используя Python и numpy в блокнотах Jupyter.
Я использую курс CS231n в качестве базы знаний, особенно эта страница: где обсуждается градиентный спуск. Я реализовал класс SVM,...

0 Ответы

12 Просмотры

Последнее сообщение Anonymous
23 окт 2024, 20:05
Почему моя функция градиентного спуска дает мне большие отрицательные значения?

Последнее сообщение Anonymous « 22 сен 2023, 09:50
Добавлено в форуме Python

Anonymous » 22 сен 2023, 09:50 » в форуме Python

Я пытаюсь запрограммировать градиентный спуск на Python. Первый код ниже отображает функцию ошибок для случаев 2D (wx+b) и 1D(wx). Второй код — это моя функция градиентного спуска, которая сохраняется как отдельная функция, то есть не в основном...

0 Ответы

59 Просмотры

Последнее сообщение Anonymous
22 сен 2023, 09:50
Почему моя функция градиентного спуска для составления полиномиального уравнения для sinx не может работать

Последнее сообщение Гость « 24 сен 2023, 10:18
Добавлено в форуме Python

Гость » 24 сен 2023, 10:18 » в форуме Python

Мне нужно написать программу, которая будет использовать градиентный спуск, чтобы подогнать полиномиальное уравнение к sin(x). И я не могу не использовать автоград pytorch, поэтому мне нужно манипулировать собой.

Я проверял свой код много раз, но...

0 Ответы

50 Просмотры

Последнее сообщение Гость
24 сен 2023, 10:18
Многомерная линейная регрессия с ошибкой градиентного спуска

Последнее сообщение Anonymous « 13 май 2024, 20:44
Добавлено в форуме Python

Anonymous » 13 май 2024, 20:44 » в форуме Python

Я следую руководству из этого видео на YouTube ( lCOHri09YmM ), но получаю сообщение об ошибке «в вычитании coeff = coeff - der обнаружено недопустимое значение», и тогда мои окончательные значения коэффициентов будут . Ниже мой код
#Gradient...

0 Ответы

47 Просмотры

Последнее сообщение Anonymous
13 май 2024, 20:44

Вернуться в «Python»