Очень низкая точность модели EfficeNetB3 и плато обучения при выявлении проблемы с опухолями головного мозгаPython

Программы на Python
Ответить Пред. темаСлед. тема
Anonymous
 Очень низкая точность модели EfficeNetB3 и плато обучения при выявлении проблемы с опухолями головного мозга

Сообщение Anonymous »

Ссылка НАБОР ДАННЫХ: (https://www.kaggle.com/datasets/denizka ... tumor/data)
Это код
# system-libraries
import os
import shutil
import time
import pathlib
import itertools

# data-handling
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.utils import class_weight

# deep-learning libraries
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from tensorflow.keras.applications import ResNet50, EfficientNetB3
from keras.optimizers import Adam, Adamax
from keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation, Dropout, BatchNormalization, GlobalAveragePooling2D
from keras.regularizers import l1,l2
from tensorflow.keras.callbacks import TensorBoard
from keras import regularizers

# ignore-warnings
import warnings
warnings.filterwarnings("ignore")

print(f"Modules Loaded")

# data-path with labels generating
def define_paths(data_dir):
filepath = []
label = []

folds = os.listdir(data_dir)
for fold in folds:
files = f"{data_dir}/{fold}"
# files = os.path.join(data_dir,fold)
imgs = os.listdir(files)
for img in imgs:
path = f"{files}/{img}"
filepath.append(path)
if fold=='1':
label.append('meningioma')
elif fold=='2':
label.append('glioma')
elif fold=='3':
label.append('pituitary tumor')
return filepath, label

# create dataframe
def define_df(files, classes):
Fseries = pd.Series(files, name='filepaths')
Cseries = pd.Series(classes, name='labels')

return pd.concat([Fseries,Cseries], axis=1)

def create_df(data_dir):
filepath, label = define_paths(data_dir)
df = define_df(filepath, label)

train_df, dummy_df = train_test_split(df, train_size=0.8, shuffle=True, stratify=df['labels'], random_state=123)

valid_df, test_df = train_test_split(dummy_df, test_size=0.5, shuffle=True, random_state=123, stratify=dummy_df['labels'])

return train_df, valid_df, test_df

# Converting image to tensors via image_data_generator
def create_gens(train_df, valid_df, test_df, batch_size):

# model parameters
image_size = (320,320)
channel = 3
color = 'rgb'
image_shape = (image_size[0], image_size[1], channel)

ts_length = len(test_df)
test_batch_size = max(sorted([ts_length//n for n in range(1,ts_length+1) if ts_length%n==0 and ts_length/n=8 else 'horizontal'
for i in range(lcount):
plt.text(i, values/2, str(values), fontsize=12, rotation=rotation, color='yellow', ha='center')
plt.show()

def plot_label_count(df, plot_title):

v_counts = df['labels'].value_counts()
labels = v_counts.keys().to_list()
values = v_counts.to_list()
l_count = len(labels)

if l_count>55:
print(f"The number of labels is greater than 55, no plot will be produced.")
else:
plot_labels(l_count, labels, values, plot_title)

# stop model training after specfic time, stop training if no improve in accuracy and so on
class MyCallBack(keras.callbacks.Callback):
def __init__(self, patience, stop_patience, threshold, factor, batches, epochs, ask_epoch):
super(MyCallBack,self).__init__()
# self.model = model
self.patience = patience # specifies how many epochs without improvement before learning rate is adjusted
self.stop_patience = stop_patience # specifies how many times to adjust lr without improvement to stop training
self.epochs = epochs
self.threshold = threshold # specifies training accuracy threshold when lr will be adjusted based on validation loss
self.factor = float(factor) # factor by which to reduce the learning rate
print(self.factor,type(self.factor))
self.batches = batches # number of training batch to run per epoch
self.ask_epoch = ask_epoch
self.ask_epoch_initial = ask_epoch # save this value to restore if restarting training

# callback parameters
self.count = 0 # how many times lr has been reduced without improvement
self.stop_count = 0
self.best_epoch = 0 # epoch with the lowest loss
self.initial_lr = float(tf.keras.backend.get_value(model.optimizer.learning_rate)) # get the initial learning rate and save it
self.highest_tracc = 0.0 # set highest training accuracy to 0 initially
self.lowest_vloss = np.inf # set lowest validation loss to infinity initially
self.best_weights = None # set best weights to model's initial weights
self.initial_weights = None # save initial weights if they have to get restored

# run when training begins
def on_train_begin(self, logs=None):
self.best_weights = self.model.get_weights()
self.initial_weights = self.model.get_weights()
msg = f"do you want the model to halt training [y/n]? "
print(msg)
ans = input('')

if ans in ['Y','y']:
self.ask_permission = 1
else:
self.ask_permission = 0

msg = "{0:^8s}{1:^10s}{2:^9s}{3:^9s}{4:^9s}{5:^9s}{6:^9s}{7:^10s}{8:^10s}{9:^8s}".format('Epoch','Loss','Accuracy','V_loss','V_acc','LR','Next LR','Monitor','% Improv','Duration')
print(msg)
self.start_time = time.time()

def on_train_end(self, logs=None):
stop_time = time.time()
tr_duration = stop_time - self.start_time
hours = tr_duration//3600
minutes = (tr_duration-(hours*3600))//60
seconds = tr_duration-(hours*3600)-(minutes*60)

msg = f"Training time elapsed was {int(np.floor(hours))} hrs::{int(np.floor(minutes))} mins::{int(np.floor(seconds))} sec"
print(msg)
self.model.set_weights(self.best_weights)

def on_train_batch_end(self, batch, logs=None):
acc = logs.get('accuracy')*100
loss = logs.get('loss')

msg = '{0:20s}processing batch {1:} of {2:5s}- accuracy= {3:5.3f} - loss:{4:8.5f}'.format(' ',str(batch), str(self.batches), acc, loss)
print(msg, "\r", end= '')

def on_epoch_begin(self, epoch, logs=None):
self.ep_start = time.time()

def on_epoch_end(self, epoch, logs=None):
ep_end = time.time()
duration = ep_end - self.ep_start

lr = float(tf.keras.backend.get_value(self.model.optimizer.learning_rate))
current_lr = lr
acc = logs.get('accuracy')
v_acc = logs.get('val_accuracy')
loss = logs.get('loss')
v_loss = logs.get('val_loss')

if acc < self.threshold:
monitor = 'accuracy'
if epoch == 0:
pimprov = 0.0
else:
pimprov = (acc-self.highest_tracc)*100/self.highest_tracc # improvement in accuracy

if acc > self.highest_tracc:
self.highest_tracc = acc
self.best_weights = self.model.get_weights()
self.count = 0
self.stop_count = 0

if v_loss < self.lowest_vloss:
self.lowest_vloss = v_loss

self.best_epoch = epoch+1

else:
if self.count >= self.patience:
lr = lr*self.factor
self.model.optimizer.learning_rate.assign(lr)
self.count = 0
self.stop_count += 1

else:
self.count += 1

if v_loss < self.lowest_vloss:
self.lowest_vloss = v_loss

else:
monitor = 'val_loss'
if epoch == 0:
pimprov = 0
else:
pimprov = ((self.lowest_vloss - v_loss)*100)/self.lowest_vloss # improvement in validation loss

if v_loss < self.lowest_vloss:
self.lowest_vloss = v_loss
self.best_weights = self.model.get_weights()
self.count = 0
self.stop_count = 0

if acc > self.highest_tracc:
self.highest_tracc = acc

self.best_epoch = epoch+1

else:

if self.count >= self.patience:
lr = lr*self.factor
self.model.optimizer.learning_rate.assign(lr)
self.count = 0
self.stop_count += 1

else:
self.count += 1

if acc > self.highest_tracc:
self.highest_tracc = acc

msg = f"{str(epoch+1):^3s}/{str(self.epochs):4s} {loss:^9.3f}{acc*100:^9.3f}{v_loss:^9.5f}{v_acc*100:^9.3f}{current_lr:^9.5f}{lr:^9.5f}{monitor:^11s}{pimprov:^10.2f}{duration:^8.2f}"
print(msg)

if self.stop_count > self.stop_patience-1:
msg = f"Training has been halted at epoch {epoch+1} after {self.stop_patience} adjustments of learning rate with no improvement"
print(msg)
self.model.stop_training = True
else:
if self.ask_epoch != None and self.ask_permission != 0:
if epoch+1 >= self.ask_epoch:
msg = f"enter 'H' to halt training or an integer for number of epochs to run then ask again"
print(msg)
ans = input('')
if ans in ['H','h']:
msg = f"Training has been halted at epoch {epoch+1} due to user input."
print(msg)
self.model.stop_training = True

else:
try:
ans = int(ans)
self.ask_epoch += ans

msg = f"Training will continue until epoch {str(self.ask_epoch)}"
print(msg)
msg = '{0:^8s}{1:^10s}{2:^9s}{3:^9s}{4:^9s}{5:^9s}{6:^9s}{7:^10s}{8:10s}{9:^8s}'.format('Epoch', 'Loss', 'Accuracy', 'V_loss', 'V_acc', 'LR', 'Next LR', 'Monitor', '% Improv', 'Duration')
print(msg)
except Exception:
print('Invalid')

# take training model and plot history of accuracy and losses with the best epoch in both of them.
def plot_training(hist):

tr_acc = hist.history['accuracy']
tr_loss = hist.history['loss']
val_acc = hist.history['val_accuracy']
val_loss = hist.history['val_loss']
index_loss = np.argmin(val_loss)
val_lowest = val_loss[index_loss]
index_acc = np.argmax(val_acc)
acc_highest = val_acc[index_acc]
Epochs = [i+1 for i in range(len(tr_acc))]
loss_label = f"best epoch = {str(index_loss+1)}"
acc_label = f"best epoch = {str(index_acc+1)}"

# Plot training history
plt.figure(figsize=(20,8))
plt.style.use('fivethirtyeight')

plt.subplot(1,2,1)
plt.plot(Epochs, tr_loss, 'r', label='Training Loss')
plt.plot(Epochs, val_loss, 'g', label='Validation Loss')
plt.scatter(index_loss+1, val_lowest, s=150, c='blue', label=loss_label)
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(Epochs, tr_acc, 'r', label= 'Training Accuracy')
plt.plot(Epochs, val_acc, 'g', label= 'Validation Accuracy')
plt.scatter(index_acc + 1 , acc_highest, s= 150, c= 'blue', label= acc_label)
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout
plt.show()

# plot confusion matrix method from sklearn package.
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion Matrix', cmap=plt.cm.Blues):
plt.figure(figsize=(10,10))
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()

tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)

if normalize:
cm = cm.astype('float')/cm.sum(axis=1)[:,np.newaxis]
print(f"Normalized Confusion Matrix")
else:
print(f"Confusion Matrix Without Normalization")

print(cm)
thresh = cm.max()/2.
for i,j in itertools.product(range(cm.shape[0]),range(cm.shape[1])):
plt.text(j, i, cm[i, j], horizontalalignment= 'center', color='white' if cm[i,j] > thresh else 'black')

plt.tight_layout()
plt.ylabel('True Label')
plt.xlabel('Predicted Label')

data_dir = "./archive"
try:
train_df, valid_df, test_df = create_df(data_dir)
batch_size = 40
train_gen, valid_gen, test_gen = create_gens(train_df, valid_df, test_df, batch_size)
print(train_gen, valid_gen, test_gen)

except:
print('Invalid Input')

show_img(train_gen)

plot_label_count(train_df,"Train")

# create model structure
img_size = (320,320)
channels = 3
image_shape = (img_size[0], img_size[1], channels)
class_count = len(list(train_gen.class_indices.keys()))

# using efficientnetb3(more accurate than efficientnetb0 and more efficient than efficientnetb7) from EfficientNet family
base_model = EfficientNetB3(include_top=False, weights='imagenet', input_shape=image_shape)

model = Sequential([ base_model,
Conv2D(32, (3, 3), padding='same', activation='relu'),
BatchNormalization(),
MaxPooling2D(pool_size=(2, 2)),
Dropout(rate=0.25),
Conv2D(64, (3, 3), padding='same', activation='relu'),
BatchNormalization(),
MaxPooling2D(pool_size=(2, 2)),
Dropout(rate=0.25),
Conv2D(128, (3, 3), padding='same', activation='relu'),
BatchNormalization(),
MaxPooling2D(pool_size=(2, 2)),
Dropout(rate=0.25),
GlobalAveragePooling2D(),
Dense(512, kernel_regularizer=l2(l2=0.016), activity_regularizer=l1(0.006), bias_regularizer=l1(0.006), activation='relu'),
Dropout(rate=0.5),
Dense(256, kernel_regularizer=l2(l2=0.016), activity_regularizer=l1(0.006), bias_regularizer=l1(0.006), activation='relu'),
Dropout(rate=0.5),
Dense(class_count, activation='softmax') ])

model.compile(Adamax(learning_rate=0.0001), loss= 'categorical_crossentropy', metrics=['accuracy'])
model.summary()

# setting callbacks
batch_size = 40
epochs = 50
patience = 1
stop_patience = 3
threshold = 0.9
factor = 0.8
ask_epoch = 5
batches = int(np.ceil(len(train_gen.labels)/batch_size)) # number of training batch to run per epoch

logs = "./logs"
os.makedirs(logs, exist_ok=True)
tensorboard_callback = TensorBoard(log_dir=logs, histogram_freq=1)
callbacks = [MyCallBack(model= model, patience= patience, stop_patience= stop_patience, threshold= threshold,
factor= factor, batches= batches, epochs= epochs, ask_epoch= ask_epoch ), tensorboard_callback]

# train model
history = model.fit(x=train_gen, epochs=epochs, verbose=1, callbacks=callbacks, validation_data=valid_gen, validation_steps=None, shuffle=False)

# model performance
plot_training(history)

# model evaluation
ts_length = len(test_df)
test_batch_size = max(sorted([ts_length//n for n in range(1,ts_length+1) if ts_length%n==0 and ts_length/n
Очень низкая точность модели EfficientNetB3 и плато низкого обучения при выявлении проблемы опухоли головного мозга. Я пытался использовать различные предварительно обученные модели и добавить больше слоев в модель, но точность застряла на плато (46,552%) после некоторого эпохального цикла, который очень низкий.

Подробнее здесь: https://stackoverflow.com/questions/793 ... tifying-br
Реклама
Ответить Пред. темаСлед. тема

Быстрый ответ

Изменение регистра текста: 
Смайлики
:) :( :oops: :roll: :wink: :muza: :clever: :sorry: :angel: :read: *x)
Ещё смайлики…
   
К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми.

Максимально разрешённый размер вложения: 15 МБ.

  • Похожие темы
    Ответы
    Просмотры
    Последнее сообщение

Вернуться в «Python»