Я хочу, чтобы моя модель определяла угол (в 360 классах) с использованием самостоятельно созданных изображений текста.
Чтобы получить больше информации для обучения, изображения обучающего набора генерируются заново каждую эпоху с новым случайным вращением. .
Однако кажется, что модель не обучается, поскольку она предсказывает один и тот же класс для каждого изображения. Я пробовал менять размер пакета, оптимизатор, скорость обучения, более сложные модели, но ничего не помогло решить проблему.
В этом примере я использую
500 обучающих образцов, 50 проверочных образцов и 10 тестовых образцов. Я пробовал использовать до 2000 обучающих выборок, однако возникла та же проблема.
import numpy as np
from keras import backend as K
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers import Input, Dense, Flatten
from keras.models import Model
from keras.optimizers import Adam
from keras.preprocessing import image as keras_image
from keras.utils import Sequence
from keras.utils.np_utils import to_categorical
from PIL import Image
import math
from random import randint
import os
from numpy import argmax
from create_text_images import create_data
def get_dataset(directory, name):
"""
Resize the pictures in the directory and return as a numpy array.
"""
X_train = []
for i, img_name in enumerate(os.listdir(directory)):
img_path = os.path.join(directory, img_name)
with Image.open(img_path) as img:
img = img.resize((262, 262))
x = keras_image.img_to_array(img)
X_train.append(x)
X_train = np.array(X_train)
return X_train
def rotate_pictures(X_images):
"""
Randomly rotate the picture, then crop it to size 224x224.
Return the image as x normalized /255
and the rotation (converted to 360 categories) as y.
"""
X_train, y_train = [], []
for i, img in enumerate(X_images):
img = keras_image.array_to_img(img)
rotation = randint(0, 359)
img = img.rotate(rotation, resample=Image.BICUBIC)
w, h = img.size
img = img.crop(((w//2 - 112),(h//2 - 112), (w//2 + 112), (h//2 + 112)))
x = keras_image.img_to_array(img)/255.0
X_train.append(x)
y_train.append(rotation)
y_train = to_categorical(y_train, num_classes=360)
X_train = np.array(X_train)
y_train = np.array(y_train)
return X_train, y_train
class data_generator(Sequence):
"""
On initiation, create x and y data with the rotated pictures and their rotation.
If the dataset is 'train', then rotate original pictures again after every epoch.
"""
def __init__(self, images, name, batch_size):
self.images = images
self.name = name
self.x, self.y = rotate_pictures(self.images)
self.batch_size = batch_size
self.indices = np.arange(self.x.shape[0])
self.on_epoch_end()
def __len__(self):
return math.ceil(self.x.shape[0] / self.batch_size)
def __getitem__(self, idx):
inds = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
batch_x = self.x[inds]
batch_y = self.y[inds]
return batch_x, batch_y
def on_epoch_end(self):
if self.name == "train":
self.x, self.y = rotate_pictures(self.images)
def mse_angle(y_true, y_pred):
"""
Calculate the mean difference between the true angles
and the predicted angles. Each angle is represented
as a binary vector.
"""
a = K.argmax(y_true)
b = K.argmax(y_pred)
diff = 180 - abs(abs(a - b) - 180)
return K.mean(K.cast(K.abs(diff), K.floatx()))
train_dir = "train/"
val_dir = "val/"
test_dir = "test/"
number_of_epochs = 50
number_of_classes = 360
input_shape = (224, 224, 3)
activation_fn = 'softmax'
batch_size = 32
create_data(train_dir, 500)
X_train = get_dataset(train_dir, "train")
train_generator = data_generator(X_train, "train", batch_size)
create_data(val_dir, 50)
X_val = get_dataset(val_dir, "val")
val_generator = data_generator(X_val, "val", batch_size)
create_data(test_dir, 10)
X_test = get_dataset(test_dir, "test")
X_test, y_test = rotate_pictures(X_test)
input_tensor = Input(shape=input_shape)
x = Conv2D(32, (3, 3), activation='relu')(input_tensor)
x = MaxPooling2D((2,2), strides=(2,2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D((2,2), strides=(2,2))(x)
x = Conv2D(128, (3, 3), activation='relu')(x)
x = MaxPooling2D((2,2), strides=(2,2))(x)
x = Conv2D(128, (3, 3), activation='relu')(x)
x = MaxPooling2D((2,2), strides=(2,2))(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
output_tensor = Dense(number_of_classes, activation=activation_fn)(x)
model = Model(input_tensor, output_tensor)
model.summary()
model.compile(
loss='categorical_crossentropy',
optimizer=Adam(lr=0.1),
metrics=[mse_angle]
)
history = model.fit(
train_generator,
epochs=number_of_epochs,
validation_data=val_generator
)
model.save_weights('model_weights.h5')
predictions = model.predict(X_test)
for i, prediction in enumerate(predictions):
angle = argmax(y_test[i])
pred = argmax(prediction)
print("for image {0} angle: {1}, pred: {2}".format(i, angle, pred))
Для запуска кода его необходимо поместить в каталог с тремя пустыми папками (val, train, test) и create_test_images.py:
Я хочу, чтобы моя модель определяла угол (в 360 классах) с использованием самостоятельно созданных изображений текста. Чтобы получить больше информации для обучения, изображения обучающего набора генерируются заново каждую эпоху с новым случайным вращением. . Однако кажется, что модель не обучается, поскольку она предсказывает один и тот же класс для каждого изображения. Я пробовал менять размер пакета, оптимизатор, скорость обучения, более сложные модели, но ничего не помогло решить проблему.
В этом примере я использую 500 обучающих образцов, 50 проверочных образцов и 10 тестовых образцов. Я пробовал использовать до 2000 обучающих выборок, однако возникла та же проблема.
Это мой результат:
[code]Using TensorFlow backend. WARNING:tensorflow:From /home/lisa/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4070: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.
Model: "model_1" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_1 (InputLayer) (None, 224, 224, 3) 0 _________________________________________________________________ conv2d_1 (Conv2D) (None, 222, 222, 32) 896 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 111, 111, 32) 0 _________________________________________________________________ conv2d_2 (Conv2D) (None, 109, 109, 64) 18496 _________________________________________________________________ max_pooling2d_2 (MaxPooling2 (None, 54, 54, 64) 0 _________________________________________________________________ conv2d_3 (Conv2D) (None, 52, 52, 128) 73856 _________________________________________________________________ max_pooling2d_3 (MaxPooling2 (None, 26, 26, 128) 0 _________________________________________________________________ conv2d_4 (Conv2D) (None, 24, 24, 128) 147584 _________________________________________________________________ max_pooling2d_4 (MaxPooling2 (None, 12, 12, 128) 0 _________________________________________________________________ flatten_1 (Flatten) (None, 18432) 0 _________________________________________________________________ dense_1 (Dense) (None, 512) 9437696 _________________________________________________________________ dense_2 (Dense) (None, 360) 184680 ================================================================= Total params: 9,863,208 Trainable params: 9,863,208 Non-trainable params: 0 _________________________________________________________________ 2019-11-06 11:08:47.885295: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA 2019-11-06 11:08:47.901431: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 3408000000 Hz 2019-11-06 11:08:47.902091: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55f4487aac50 executing computations on platform Host. Devices: 2019-11-06 11:08:47.902139: I tensorflow/compiler/xla/service/service.cc:175] StreamExecutor device (0): , 2019-11-06 11:08:47.903354: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcuda.so.1 2019-11-06 11:08:47.921001: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1005] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2019-11-06 11:08:47.921953: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1640] Found device 0 with properties: name: GeForce GTX 970 major: 5 minor: 2 memoryClockRate(GHz): 1.1775 pciBusID: 0000:01:00.0 2019-11-06 11:08:47.922112: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.0 2019-11-06 11:08:47.922988: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcublas.so.10.0 2019-11-06 11:08:47.923739: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcufft.so.10.0 2019-11-06 11:08:47.923921: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcurand.so.10.0 2019-11-06 11:08:47.924921: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusolver.so.10.0 2019-11-06 11:08:47.925684: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcusparse.so.10.0 2019-11-06 11:08:47.928111: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudnn.so.7 2019-11-06 11:08:47.928199: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1005] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2019-11-06 11:08:47.929103: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1005] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2019-11-06 11:08:47.929818: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1763] Adding visible gpu devices: 0 2019-11-06 11:08:47.929844: I tensorflow/stream_executor/platform/default/dso_loader.cc:42] Successfully opened dynamic library libcudart.so.10.0 2019-11-06 11:08:47.976192: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1181] Device interconnect StreamExecutor with strength 1 edge matrix: 2019-11-06 11:08:47.976213: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1187] 0 2019-11-06 11:08:47.976219: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1200] 0: N 2019-11-06 11:08:47.976372: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1005] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2019-11-06 11:08:47.977217: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1005] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2019-11-06 11:08:47.978039: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:1005] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero 2019-11-06 11:08:47.978851: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1326] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 3466 MB memory) -> physical GPU (device: 0, name: GeForce GTX 970, pci bus id: 0000:01:00.0, compute capability: 5.2) 2019-11-06 11:08:47.980313: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55f449158000 executing computations on platform CUDA. Devices: 2019-11-06 11:08:47.980326: I tensorflow/compiler/xla/service/service.cc:175] StreamExecutor device (0): GeForce GTX 970, Compute Capability 5.2 WARNING:tensorflow:From /home/lisa/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:422: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.
[code]import numpy as np from keras import backend as K from keras.layers.convolutional import Conv2D, MaxPooling2D from keras.layers import Input, Dense, Flatten from keras.models import Model from keras.optimizers import Adam from keras.preprocessing import image as keras_image from keras.utils import Sequence from keras.utils.np_utils import to_categorical from PIL import Image import math from random import randint import os from numpy import argmax from create_text_images import create_data
def get_dataset(directory, name): """ Resize the pictures in the directory and return as a numpy array. """ X_train = [] for i, img_name in enumerate(os.listdir(directory)): img_path = os.path.join(directory, img_name) with Image.open(img_path) as img: img = img.resize((262, 262)) x = keras_image.img_to_array(img) X_train.append(x) X_train = np.array(X_train) return X_train
def rotate_pictures(X_images): """ Randomly rotate the picture, then crop it to size 224x224. Return the image as x normalized /255 and the rotation (converted to 360 categories) as y. """ X_train, y_train = [], [] for i, img in enumerate(X_images): img = keras_image.array_to_img(img) rotation = randint(0, 359) img = img.rotate(rotation, resample=Image.BICUBIC) w, h = img.size img = img.crop(((w//2 - 112),(h//2 - 112), (w//2 + 112), (h//2 + 112))) x = keras_image.img_to_array(img)/255.0 X_train.append(x) y_train.append(rotation) y_train = to_categorical(y_train, num_classes=360) X_train = np.array(X_train) y_train = np.array(y_train) return X_train, y_train
class data_generator(Sequence): """ On initiation, create x and y data with the rotated pictures and their rotation. If the dataset is 'train', then rotate original pictures again after every epoch. """ def __init__(self, images, name, batch_size): self.images = images self.name = name self.x, self.y = rotate_pictures(self.images) self.batch_size = batch_size self.indices = np.arange(self.x.shape[0]) self.on_epoch_end() def __len__(self): return math.ceil(self.x.shape[0] / self.batch_size) def __getitem__(self, idx): inds = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size] batch_x = self.x[inds] batch_y = self.y[inds] return batch_x, batch_y def on_epoch_end(self): if self.name == "train": self.x, self.y = rotate_pictures(self.images)
def mse_angle(y_true, y_pred): """ Calculate the mean difference between the true angles and the predicted angles. Each angle is represented as a binary vector. """ a = K.argmax(y_true) b = K.argmax(y_pred) diff = 180 - abs(abs(a - b) - 180) return K.mean(K.cast(K.abs(diff), K.floatx()))
Я обучил модель машинного обучения с помощью TensorFlow/Keras для классификации арабских символов, и она достигает точности около 91 % в наборе тестовых данных. Однако когда я подключаю эту модель к своему приложению с графическим интерфейсом...
Я обучил модель машинного обучения с помощью TensorFlow/Keras для классификации арабских символов, и она достигает точности около 91 % в наборе тестовых данных. Однако когда я подключаю эту модель к своему приложению с графическим интерфейсом...
Я попытался построить CNN с нуля и проверить его в наборе данных Village, он имеет 96% точности и точность валидации 93%, но когда я пытаюсь проверить его с изображениями тестирования, всегда прогнозируя один и тот же класс ... но с другой стороны,...
Я новичок в ИИ и глубоком обучении, и я обучил бинарную классификацию CNN, используя Tensorflow/Keras, чтобы различать кошек и собак. Однако при оценке на наборе данных тестового набора данных модель предсказывает только «CAT» для каждого...
Я обучил бинарную классификацию CNN, используя Tensorflow/Keras, чтобы различать кошек и собак. Однако при оценке на наборе данных тестового набора данных модель предсказывает только «CAT» для каждого изображения, даже если набор данных содержит оба...