Когда я тренирую свою модель CNN, потери и точность не улучшаются.Python

Программы на Python
Ответить Пред. темаСлед. тема
Anonymous
 Когда я тренирую свою модель CNN, потери и точность не улучшаются.

Сообщение Anonymous »

Я написал код для MLP и CNN без инфраструктуры DL.
Я подтвердил, что мой MLP(ThreeNN) работает хорошо.
Однако мой CNN вообще не повышает точность или потери даже после обучения.< /p>
Я проверил входное изображение, нормализовал его, проверил каждую операцию, проверил размерность матрицы, изменил скорость обучения и изменил начальное значение веса, но на все.
Я не знаю, в чем проблема. Пожалуйста, помогите мне.
ниже приведен мой код

Код: Выделить всё

import numpy as np
import matplotlib.pyplot as plt
from dataloader import Dataloader
import sys

BATCH_SIZE_MLP = 16
EPOCH_MLP = 10
PATIENCE_MLP = 5

BATCH_SIZE_CNN = 8
EPOCH_CNN = 10
PATIENCE_CNN = 5

original_stdout = sys.stdout

class Linear():
def __init__(self, input_size, output_size, lr=0.01):
self.lr = lr
self.weight = np.random.randn(input_size, output_size)*0.1
self.bias = np.zeros((1, output_size))

def forward(self, input):
self.input = input
return np.dot(self.input, self.weight) + self.bias

def backward(self, output_gradient):
input_gradient = np.dot(output_gradient, self.weight.T)
weight_gradient = np.dot(self.input.T, output_gradient)

self.weight = self.weight - self.lr*weight_gradient
# self.bias = self.bias - self.lr*output_gradient # batch = 1
self.bias = self.bias - self.lr*np.mean(output_gradient, axis=0, keepdims=True) # batch > 1
return input_gradient

class ReLU():
def forward(self, input):
self.input = input
return np.maximum(0, input)

def backward(self, output_gradient):
grad = self.input > 0
return grad * output_gradient

class SoftMax():
def forward(self, input):
exp_val = np.exp(np.clip(input - np.max(input, axis=1, keepdims=True), -700, None))
prob = exp_val / np.sum(exp_val, axis=1, keepdims=True)
self.output = prob
return self.output

def backward(self, output_gradient):
return self.output - output_gradient

class CEloss():
def forward(self, expected, ground_truth):
# loss = -np.sum(ground_truth * np.log(expected)) # batch = 1
loss = -np.sum(ground_truth * np.log(expected + 1e-8)) / ground_truth.shape[0] # batch >  1
return loss

def backward(self, expected, ground_truth):
return expected - ground_truth

class ThreeNN():
def __init__(self, input_size, output_size, lr=0.01):
self.layer1 = Linear(input_size, 128, lr=lr)
self.activation1 = ReLU()
self.layer2 = Linear(128, 64, lr=lr)
self.activation2 = ReLU()
self.layer3 = Linear(64, output_size, lr=lr)
self.softmax = SoftMax()
self.loss_function = CEloss()

def forward(self, input, ground_truth):
self.input = input
self.gt = ground_truth

self.output1 = self.layer1.forward(self.input)
self.activated1 = self.activation1.forward(self.output1)
self.output2 = self.layer2.forward(self.activated1)
self.activated2 = self.activation2.forward(self.output2)
self.output3 = self.layer3.forward(self.activated2)
self.softmax_output = self.softmax.forward(self.output3)

self.loss = self.loss_function.forward(self.softmax_output, ground_truth)
return self.softmax_output, self.loss

def backward(self):
output_gradient = self.loss_function.backward(self.softmax_output, self.gt)

output_gradient = self.softmax.backward(self.gt)
output_gradient = self.layer3.backward(output_gradient)
output_gradient = self.activation2.backward(output_gradient)
output_gradient = self.layer2.backward(output_gradient)
output_gradient = self.activation1.backward(output_gradient)
output_gradient = self.layer1.backward(output_gradient)

return output_gradient

#################################################

# Loss
train_loss_history_mlp = []
test_loss_history_mlp = []

def train_MLP(model, train_loader_MLP, test_loader_MLP, epochs=EPOCH_MLP):
global best_test_loss, patience_counter
best_test_loss = float('inf')
patience_counter = 0

for epoch in range(epochs):
total_train_loss = 0
correct_train = 0
total_train = 0

# Training
for batch_images, batch_labels in train_loader_MLP:
batch_images = batch_images.reshape(batch_images.shape[0], -1)

output, loss = model.forward(batch_images, batch_labels)
model.backward()
total_train_loss += loss

predictions = np.argmax(output, axis=1)
correct_train += np.sum(predictions == np.argmax(batch_labels, axis=1))
total_train += batch_images.shape[0]

train_accuracy = correct_train / total_train
average_train_loss = total_train_loss / total_train
train_loss_history_mlp.append(average_train_loss)

# Test
total_test_loss, test_accuracy = test_loss_MLP(model, test_loader_MLP)

print(f'Epoch {epoch+1}/{epochs}, Train Loss: {average_train_loss:.4f}, Train Accuracy: {train_accuracy * 100:.2f}%, Test Loss: {total_test_loss:.4f}, Test Accuracy: {test_accuracy * 100:.2f}%')

if total_test_loss < best_test_loss:
best_test_loss = total_test_loss
patience_counter = 0
else:
patience_counter += 1

if patience_counter >  PATIENCE_MLP:
print(f"Early stopping at epoch {epoch+1}")
break

def test_loss_MLP(model, test_loader_MLP):
total_loss = 0
correct = 0
total = 0

for batch_images, batch_labels in test_loader_MLP:
batch_images = batch_images.reshape(batch_images.shape[0], -1)

output, loss = model.forward(batch_images, batch_labels)
total_loss += loss * batch_images.shape[0]
correct += np.sum(np.argmax(output, axis=1) == np.argmax(batch_labels, axis=1))
total += batch_images.shape[0]

average_loss = total_loss / total
test_loss_history_mlp.append(average_loss)

accuracy = correct / total
return average_loss, accuracy

def plot_loss_graph_MLP():
plt.figure()
plt.plot(train_loss_history_mlp, label='Train Loss')
plt.plot(test_loss_history_mlp, label='Test Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Test Loss')
plt.legend()
plt.show()

def plot_confusion_matrix_MLP(model, test_loader):
num_classes = 10
confusion_matrix = np.zeros((num_classes, num_classes), dtype=int)

for batch_images, batch_labels in test_loader:
batch_images = batch_images.reshape(batch_images.shape[0], -1)
output, _ = model.forward(batch_images, batch_labels)

predictions = np.argmax(output, axis=1)
true_labels = np.argmax(batch_labels, axis=1)

for t, p in zip(true_labels, predictions):
confusion_matrix[t, p] += 1

confusion_matrix_normalized = confusion_matrix.astype('float') / confusion_matrix.sum(axis=1)[:, np.newaxis]

fig, ax = plt.subplots()
cax = ax.matshow(confusion_matrix_normalized, cmap='Blues')
fig.colorbar(cax)

ax.set_xlabel('Predicted Label')
ax.xaxis.set_label_position('bottom')
ax.xaxis.tick_bottom()
ax.set_ylabel('True Label')

for i in range(num_classes):
for j in range(num_classes):
ax.text(j, i, f'{confusion_matrix_normalized[i, j]:.2f}', ha='center', va='center', color='black')

plt.title('Confusion Matrix with Probabilities')
plt.show()

def plot_top3_images_with_probability_MLP(model, test_loader):
num_classes = 10
top3_images = {i: [] for i in range(num_classes)}

for batch_images, batch_labels in test_loader:
batch_images = batch_images.reshape(batch_images.shape[0], -1)
output, _ = model.forward(batch_images, batch_labels)

for j, img_output in enumerate(output):
img_prob = np.max(img_output)
predicted_class = np.argmax(img_output)

if len(top3_images[predicted_class]) < 3:
top3_images[predicted_class].append((batch_images[j], img_prob))

for cls in top3_images:
top3_images[cls] = sorted(top3_images[cls], key=lambda x: x[1], reverse=True)

fig, axes = plt.subplots(num_classes, 3, figsize=(8, 8))
for cls, images in top3_images.items():
for idx, (img, prob) in enumerate(images):
ax = axes[cls, idx]
ax.imshow(img.reshape(28, 28), cmap='gray')
ax.set_title(f"{prob:.3f}")
ax.axis('off')

plt.tight_layout()
plt.show()

##############################################################

class Conv():
def __init__(self, input_channels, kernel_size, output_channels, stride=1, lr=0.01):
self.input_channels = input_channels
self.kernel_size = kernel_size
self.output_channels = output_channels
self.stride = stride
self.lr = lr

self.weight = np.random.randn(output_channels, input_channels, kernel_size, kernel_size)*0.001
self.bias = np.zeros(output_channels)

def forward(self, input):
self.input = input
batch_size, input_channels, input_height, input_width = self.input.shape #MNIST dataset ->  1 channel

output_height = (input_height - self.kernel_size) // self.stride + 1
output_width = (input_width - self.kernel_size) //self.stride + 1

output = np.zeros((batch_size, self.output_channels, output_height, output_width))
for b in range(batch_size):
for oc in range(self.output_channels):
for h in range(output_height):
for w in range(output_width):
_h = h * self.stride
_w = w * self.stride
region = input[b, :, _h:_h+self.kernel_size, _w:_w+self.kernel_size]
output[b,oc,h,w] = np.sum(region*self.weight[oc])+self.bias[oc]

return output

def backward(self, output_gradient):
batch_size, input_channels, input_height, input_width = self.input.shape #MNIST dataset ->  1 channel
output_height = (input_height - self.kernel_size) // self.stride + 1
output_width = (input_width - self.kernel_size) // self.stride + 1

input_gradient = np.zeros_like(self.input)
weight_gradient = np.zeros_like(self.weight)
bias_gradient = np.zeros_like(self.bias)

output_gradient = np.clip(output_gradient, -8.0, 8.0)
for b in range(batch_size):
for oc in range(self.output_channels):
for h in range(output_height):
for w in range(output_width):
_h = h * self.stride
_w = w * self.stride
region = self.input[b, :, _h:_h + self.kernel_size, _w:_w + self.kernel_size]

weight_gradient[oc] += region * output_gradient[b, oc, h, w]
bias_gradient[oc] += output_gradient[b, oc, h, w]
input_gradient[b, :, _h:_h + self.kernel_size, _w:_w + self.kernel_size] += self.weight[oc] * output_gradient[b, oc, h, w]

weight_gradient /= batch_size
bias_gradient /= batch_size

self.weight -= self.lr * weight_gradient
self.bias -= self.lr * bias_gradient

return input_gradient

class MaxPooling():
def __init__(self, kernel_size=2, stride=2):
self.kernel_size = kernel_size
self.stride = stride

def forward(self, input):
self.input = input
batch_size, input_channels, input_height, input_width = input.shape

output_height = (input_height - self.kernel_size) // self.stride + 1
output_width = (input_width - self.kernel_size) // self.stride + 1

output = np.zeros((batch_size, input_channels, output_height, output_width))
self.max_indices = np.zeros((batch_size, input_channels, output_height, output_width, 2), dtype=int)

for b in range(batch_size):
for ic in range(input_channels):
for h in range(output_height):
for w in range(output_width):
_h = h * self.stride
_w = w * self.stride
region = input[b, ic, _h:_h+self.kernel_size, _w:_w+self.kernel_size]
output[b, ic, h, w] = np.max(region)

max_pos = np.unravel_index(np.argmax(region), region.shape)
self.max_indices[b, ic, h, w] = (_h + max_pos[0], _w + max_pos[1])

# print("Conv output min:", np.min(output), "max:", np.max(output))
return output

def backward(self, output_gradient):
batch_size, input_channels, input_height, input_width = self.input.shape
input_gradient = np.zeros_like(self.input)

output_height = (input_height - self.kernel_size) // self.stride + 1
output_width = (input_width - self.kernel_size) // self.stride + 1

for b in range(batch_size):
for ic in range(input_channels):
for h in range(output_height):
for w in range(output_width):
_h, _w = self.max_indices[b, ic, h, w]
input_gradient[b, ic, _h, _w] = output_gradient[b, ic, h, w]

# print("Gradient min:", np.min(output_gradient), "max:", np.max(output_gradient))
return input_gradient

class CNN():
def __init__(self, lr=0.01):
self.conv1 = Conv(input_channels=1, kernel_size=3, output_channels=8, stride=1, lr=lr)
self.relu1 = ReLU()
self.maxpool1 = MaxPooling(kernel_size=2, stride=2)
self.conv2 = Conv(input_channels=8, kernel_size=3, output_channels=16, stride=1, lr=lr)
self.relu2 = ReLU()
self.maxpool2 = MaxPooling(kernel_size=2, stride=2)
self.linear = Linear(input_size=16 * 5 * 5, output_size=10, lr=lr)
self.softmax = SoftMax()
self.loss_function = CEloss()
self.f_flag = True
self.b_flag = True

def forward(self, input,  ground_truth):
self.input = input
self.gt = ground_truth

self.conv1_output = self.conv1.forward(input)
self.relu1_output = self.relu1.forward(self.conv1_output)
self.maxpool1_output = self.maxpool1.forward(self.relu1_output)
self.conv2_output = self.conv2.forward(self.maxpool1_output)
self.relu2_output = self.relu2.forward(self.conv2_output)
self.maxpool2_output = self.maxpool2.forward(self.relu2_output)

# Flatten
self.flatten_output = self.maxpool2_output.reshape(self.maxpool2_output.shape[0], -1)

# Fully connected layer
self.linear_output = self.linear.forward(self.flatten_output)
self.softmax_output = self.softmax.forward(self.linear_output)

# Loss
self.loss = self.loss_function.forward(self.softmax_output, ground_truth)

return self.softmax_output, self.loss

def backward(self):
output_gradient = self.loss_function.backward(self.softmax_output, self.gt)
output_gradient = self.softmax.backward(output_gradient)
output_gradient = self.linear.backward(output_gradient)
output_gradient = output_gradient.reshape(output_gradient.shape[0], 16, 5, 5)
output_gradient = self.maxpool2.backward(output_gradient)
output_gradient = self.relu2.backward(output_gradient)
output_gradient = self.conv2.backward(output_gradient)
output_gradient = self.maxpool1.backward(output_gradient)
output_gradient = self.relu1.backward(output_gradient)
output_gradient = self.conv1.backward(output_gradient)

##################################################################################

# Loss
train_loss_history_cnn = []
val_loss_history_cnn = []
test_loss_history_cnn = []

def train_CNN(model, train_loader_CNN, test_loader_CNN, epochs=EPOCH_CNN):
for epoch in range(epochs):
total_loss = 0
correct = 0
total = 0
batch_count = 0

for batch_images, batch_labels in train_loader_CNN:
batch_images = batch_images.reshape(batch_images.shape[0], 1, 28, 28)
output, loss = model.forward(batch_images, batch_labels)
model.backward()
total_loss += loss

predictions = np.argmax(output, axis=1)
correct += np.sum(predictions == np.argmax(batch_labels, axis=1))
total += batch_images.shape[0]

batch_count += 1
if batch_count % 10 == 0:
accuracy = correct / total
print(f'Epoch {epoch+1}, Batch {batch_count}, Loss: {loss:.4f}, Accuracy: {accuracy * 100:.2f}%')

# Test
test_loss, test_accuracy = test_loss_CNN(model, test_loader_CNN)
print(f'Epoch {epoch+1} completed.  Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy * 100:.2f}%')

def test_loss_CNN(model, test_loader_CNN):
total_loss = 0
total = 0

for batch_images, batch_labels in test_loader_CNN:
batch_images = batch_images.reshape(BATCH_SIZE_CNN, 1, 28, 28)

output, loss = model.forward(batch_images, batch_labels)
total_loss += loss * batch_images.shape[0]
total += batch_images.shape[0]

average_loss = total_loss / total
test_loss_history_cnn.append(average_loss)

def plot_loss_graph_CNN():
plt.figure()
plt.plot(train_loss_history_cnn, label='Train Loss')
plt.plot(val_loss_history_cnn, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

def plot_confusion_matrix_CNN(model, test_loader):
num_classes = 10
confusion_matrix = np.zeros((num_classes, num_classes), dtype=int)

for batch_images, batch_labels in test_loader:
batch_images = batch_images.reshape(batch_images.shape[0], 1, 28, 28)
output = model.forward(batch_images)

predictions = np.argmax(output, axis=1)
true_labels = np.argmax(batch_labels, axis=1)

for t, p in zip(true_labels, predictions):
confusion_matrix[t, p] += 1

confusion_matrix_normalized = confusion_matrix.astype('float') / confusion_matrix.sum(axis=1)[:, np.newaxis]

fig, ax = plt.subplots()
cax = ax.matshow(confusion_matrix_normalized, cmap='Blues')
fig.colorbar(cax)

ax.set_xlabel('Predicted Label')
ax.xaxis.set_label_position('bottom')
ax.xaxis.tick_bottom()
ax.set_ylabel('True Label')

for i in range(num_classes):
for j in range(num_classes):
ax.text(j, i, f'{confusion_matrix_normalized[i, j]:.2f}', ha='center', va='center', color='black')

plt.title('Confusion Matrix with Probabilities')
plt.show()

def plot_top3_images_with_probability_CNN(model, test_loader):
num_classes = 10
top3_images = {i: [] for i in range(num_classes)}

for batch_images, batch_labels in test_loader:
batch_images = batch_images.reshape(batch_images.shape[0], 1, 28, 28)
output, _ = model.forward(batch_images, batch_labels)

for j, img_output in enumerate(output):
img_prob = np.max(img_output)
predicted_class = np.argmax(img_output)

if len(top3_images[predicted_class]) <  3:
top3_images[predicted_class].append((batch_images[j], img_prob))

for cls in top3_images:
top3_images[cls] = sorted(top3_images[cls], key=lambda x: x[1], reverse=True)

fig, axes = plt.subplots(num_classes, 3, figsize=(8, 8))
for cls, images in top3_images.items():
for idx, (img, prob) in enumerate(images):
ax = axes[cls, idx]
ax.imshow(img.reshape(28, 28), cmap='gray')
ax.set_title(f"{prob:.3f}")
ax.axis('off')

plt.tight_layout()
plt.show()

################################################################################
################################################################################
# MLP - work well
# train_loader_MLP = Dataloader(path=r'D:\mnist', is_train=True, batch_size=BATCH_SIZE_MLP)
# test_loader_MLP = Dataloader(path=r'D:\mnist', is_train=False, batch_size=BATCH_SIZE_MLP)

# mlp = ThreeNN(input_size=784, output_size=10, lr=0.001)
# train_MLP(mlp, train_loader_MLP, test_loader_MLP, epochs=EPOCH_MLP)

# plot_loss_graph_MLP()
# plot_confusion_matrix_MLP(mlp, test_loader_MLP)
# plot_top3_images_with_probability_MLP(mlp, test_loader_MLP)

#######################################################################################
# CNN
train_loader_CNN = Dataloader(path=r'D:\mnist', is_train=True, batch_size=8)
test_loader_CNN = Dataloader(path=r'D:\mnist', is_train=False, batch_size=8)

cnn = CNN(lr=0.001)
train_CNN(cnn, train_loader_CNN, test_loader_CNN, epochs=EPOCH_CNN)

plot_loss_graph_CNN()
plot_confusion_matrix_CNN(cnn, test_loader_CNN)
plot_top3_images_with_probability_CNN(cnn, test_loader_CNN)
и результат...

Код: Выделить всё

Epoch 1, Batch 10, Loss: 2.3024, Accuracy: 11.25%
Epoch 1, Batch 20, Loss: 2.3009, Accuracy: 12.50%
Epoch 1, Batch 30, Loss: 2.3047, Accuracy: 12.08%
Epoch 1, Batch 40, Loss: 2.3014, Accuracy: 12.50%
Epoch 1, Batch 50, Loss: 2.2977, Accuracy: 13.00%
Epoch 1, Batch 60, Loss: 2.3006, Accuracy: 12.71%
Epoch 1, Batch 70, Loss: 2.3041, Accuracy: 11.61%
Epoch 1, Batch 80, Loss: 2.3062, Accuracy: 11.09%
Epoch 1, Batch 90, Loss: 2.2906, Accuracy: 10.97%
Epoch 1, Batch 100, Loss: 2.2983, Accuracy: 11.00%
Epoch 1, Batch 110, Loss: 2.2973, Accuracy: 10.68%
Epoch 1, Batch 120, Loss: 2.3095, Accuracy: 10.42%
Epoch 1, Batch 130, Loss: 2.3062, Accuracy: 10.29%
Epoch 1, Batch 140, Loss: 2.2879, Accuracy: 10.09%
Epoch 1, Batch 150, Loss: 2.2963, Accuracy: 10.33%
Epoch 1, Batch 160, Loss: 2.2845, Accuracy: 10.08%
Epoch 1, Batch 170, Loss: 2.3183, Accuracy: 9.93%
Epoch 1, Batch 180, Loss: 2.2966, Accuracy: 10.21%
Epoch 1, Batch 190, Loss: 2.3196, Accuracy: 9.87%
Epoch 1, Batch 200, Loss: 2.3103, Accuracy: 10.06%
Epoch 1, Batch 210, Loss: 2.2947, Accuracy: 10.06%
Epoch 1, Batch 220, Loss: 2.2914, Accuracy: 9.77%
Epoch 1, Batch 230, Loss: 2.3152, Accuracy: 9.51%
Epoch 1, Batch 240, Loss: 2.2902, Accuracy: 9.32%
Epoch 1, Batch 250, Loss: 2.3189, Accuracy: 9.30%
Epoch 1, Batch 260, Loss: 2.3081, Accuracy: 9.18%
Epoch 1, Batch 270, Loss: 2.2892, Accuracy: 9.07%
Epoch 1, Batch 280, Loss: 2.3389, Accuracy: 9.11%
Epoch 1, Batch 290, Loss: 2.3417, Accuracy: 9.05%
Epoch 1, Batch 300, Loss: 2.2972, Accuracy: 9.00%
Epoch 1, Batch 310, Loss: 2.3116, Accuracy: 9.31%
Epoch 1, Batch 320, Loss: 2.2741, Accuracy: 9.30%
Epoch 1, Batch 330, Loss: 2.2649, Accuracy: 9.17%
Epoch 1, Batch 340, Loss: 2.3019, Accuracy: 9.26%
Epoch 1, Batch 350, Loss: 2.2298, Accuracy: 9.46%
Epoch 1, Batch 360, Loss: 2.3119, Accuracy: 9.51%
вот так.

Подробнее здесь: https://stackoverflow.com/questions/791 ... ot-improve
Реклама
Ответить Пред. темаСлед. тема

Быстрый ответ

Изменение регистра текста: 
Смайлики
:) :( :oops: :roll: :wink: :muza: :clever: :sorry: :angel: :read: *x)
Ещё смайлики…
   
К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми.

Максимально разрешённый размер вложения: 15 МБ.

  • Похожие темы
    Ответы
    Просмотры
    Последнее сообщение

Вернуться в «Python»