Я новичок в Python и глубоком обучении в целом и пытаюсь выяснить, что не так с моим кодом. Я пытаюсь построить модель, которая понимает язык жестов, в частности ASL, из этого набора данных ASL-Citizen
Я использую предварительно обученную модель MoviNet. из-за вычислительных проблем я пытаюсь научить модель понимать только 200 знаков из набора данных. Вот мой код:
# Required Libraries Import
import numpy as np
import pandas as pd
import os
import cv2
import tensorflow as tf
import random
import pathlib
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from official.projects.movinet.modeling import movinet, movinet_model
# Load Data and Basic Setup
# Load the training, validation, and test files
trainFile = pd.read_csv('/content/ASL_Citizen/ASL_Citizen/splits/train.csv')
valFile = pd.read_csv('/content/ASL_Citizen/ASL_Citizen/splits/val.csv')
testFile = pd.read_csv('/content/ASL_Citizen/ASL_Citizen/splits/test.csv')
# Set limit for the number of classes
MAX_CLASSES = 200 # Keeping this to limit to 200 classes, can be removed if you want all classes.
# Map all training, validation, and test files by class
mapOfTrain = {}
mapOfVal = {}
mapOfTest = {}
# Use all videos for each selected class
for idx, word in enumerate(trainFile['Gloss'].unique()[:MAX_CLASSES]): # Limit to first 200 classes
mapOfTrain[word] = trainFile[trainFile['Gloss'] == word]['Video file'].values.tolist()
mapOfVal[word] = valFile[valFile['Gloss'] == word]['Video file'].values.tolist()
mapOfTest[word] = testFile[testFile['Gloss'] == word]['Video file'].values.tolist()
# Function to format frames (resize, pad, normalize)
def format_frames(frame, output_size):
frame = tf.image.convert_image_dtype(frame, tf.float32)
return tf.image.resize_with_pad(frame, *output_size)
# Extract frames from video files
def frames_from_video_file(video_path, n_frames, output_size=(224,224), frame_step=5):
result = []
src = cv2.VideoCapture(str(video_path))
video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)
need_length = 1 + (n_frames - 1) * frame_step
start = 0 if need_length > video_length else random.randint(0, video_length - need_length + 1)
src.set(cv2.CAP_PROP_POS_FRAMES, start)
for _ in range(n_frames):
ret, frame = src.read()
if ret:
result.append(format_frames(frame, output_size))
else:
result.append(np.zeros_like(result[0]))
src.release()
return np.array(result)[..., [2, 1, 0]]
# Limit the number of videos per class to 15
def limit_videos(data_map, max_videos=15):
limited_data_map = {}
for label, video_files in data_map.items():
limited_data_map[label] = video_files[:max_videos] # Limit to max_videos per class
return limited_data_map
# Limit videos in each dataset map
mapOfTrain = limit_videos(mapOfTrain, max_videos=15)
mapOfVal = limit_videos(mapOfVal, max_videos=15)
mapOfTest = limit_videos(mapOfTest, max_videos=15)
# Frame Generator Class without Augmentation
class FrameGenerator:
def __init__(self, data_dict, base_path, n_frames):
self.data_dict = data_dict # Dictionary of {label: list of video file names}
self.base_path = base_path # Base path to the 'videos' folder
self.n_frames = n_frames
self.class_names = sorted(data_dict.keys())
self.class_ids_for_name = {name: idx for idx, name in enumerate(self.class_names)}
def __call__(self):
for label, video_files in self.data_dict.items():
class_id = self.class_ids_for_name[label]
for video_file in video_files:
video_path = os.path.join(self.base_path, video_file)
video_frames = frames_from_video_file(video_path, self.n_frames)
yield video_frames, class_id
# Dataset Creation for Training, Testing, and Validation without augmentation
batch_size = 32
num_frames = 16
output_signature = (tf.TensorSpec(shape=(None, None, None, 3), dtype=tf.float32),
tf.TensorSpec(shape=(), dtype=tf.int16))
videos_path = '/content/ASL_Citizen/ASL_Citizen/videos'
train_ds = tf.data.Dataset.from_generator(
FrameGenerator(mapOfTrain, videos_path, num_frames),
output_signature=output_signature
).batch(batch_size)
val_ds = tf.data.Dataset.from_generator(
FrameGenerator(mapOfVal, videos_path, num_frames),
output_signature=output_signature
).batch(batch_size)
test_ds = tf.data.Dataset.from_generator(
FrameGenerator(mapOfTest, videos_path, num_frames),
output_signature=output_signature
).batch(batch_size)
print("Datasets created with a maximum of 15 videos per class and without augmentation.")
import tensorflow as tf
import tf_keras as tfk
# MoViNet Model Building and Training Setup
model_id = 'a0'
NUM_CLASSES = 200
resolution = 224
num_frames = 8
# Initialize backbone with pre-trained configuration
backbone = movinet.Movinet(model_id=model_id)
backbone.trainable = False # Start with all layers frozen
# Load the pre-trained weights
pretrained_model = movinet_model.MovinetClassifier(backbone=backbone, num_classes=600)
pretrained_model.build([None, None, None, None, 3])
# Download and load pre-trained weights
!wget https://storage.googleapis.com/tf_model_garden/vision/movinet/movinet_a0_base.tar.gz -O movinet_a0_base.tar.gz -q
!tar -xvf movinet_a0_base.tar.gz
checkpoint_dir = f'movinet_{model_id}_base'
checkpoint_path = tf.train.latest_checkpoint(checkpoint_dir)
checkpoint = tf.train.Checkpoint(model=pretrained_model)
status = checkpoint.restore(checkpoint_path)
# Check if weights loaded successfully
if status.assert_existing_objects_matched():
print("All pre-trained weights matched and loaded successfully!")
else:
print("Some pre-trained weights did not match. Check for issues.")
# Transfer Learning Setup: Adjust Model for NUM_CLASSES
model = movinet_model.MovinetClassifier(backbone=backbone, num_classes=NUM_CLASSES)
model.build([32, num_frames, resolution, resolution, 3])
# Compile the Model with a higher learning rate
loss_obj = SparseCategoricalCrossentropy()
optimizer = tfk.optimizers.Adam()
model.compile(loss=loss_obj, optimizer=optimizer, metrics=['accuracy'])
training_results = model.fit(
train_ds,
validation_data=val_ds,
epochs=50,
validation_freq=1,
verbose=1,
)
но у меня по-прежнему низкая точность как при обучении, так и при проверке. Я пытался изменить эпохи, размер пакета, номера кадров и шаг кадров. но каждый раз я получаю очень низкие цифры. вот что я получаю:
movinet_a0_base/
movinet_a0_base/checkpoint
movinet_a0_base/ckpt-1.data-00000-of-00001
movinet_a0_base/ckpt-1.index
WARNING:tensorflow:Detecting that an object or model or tf.train.Checkpoint is being deleted with unrestored values. See the following logs for the specific values in question. To silence these warnings, use `status.expect_partial()`. See https://www.tensorflow.org/api_docs/python/tf/train/Checkpoint#restorefor details about the status object returned by the restore function.
WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root)._save_counter
All pre-trained weights matched and loaded successfully!
Epoch 1/50
94/94 [==============================] - 594s 6s/step - loss: 16.0641 - accuracy: 0.0000e+00 - val_loss: 16.0391 - val_accuracy: 0.0049
Epoch 2/50
94/94 [==============================] - 563s 6s/step - loss: 16.0371 - accuracy: 0.0050 - val_loss: 16.0391 - val_accuracy: 0.0049
Epoch 3/50
94/94 [==============================] - 558s 6s/step - loss: 16.0371 - accuracy: 0.0050 - val_loss: 16.0391 - val_accuracy: 0.0049
Epoch 4/50
16/94 [====>.........................] - ETA: 6:25 - loss: 15.6459 - accuracy: 0.0293
в чем проблема. Надеюсь, кто-нибудь сможет мне помочь.
Я новичок в Python и глубоком обучении в целом и пытаюсь выяснить, что не так с моим кодом. Я пытаюсь построить модель, которая понимает язык жестов, в частности ASL, из этого набора данных ASL-Citizen [code]https://www.kaggle.com/datasets/abd0kamel/asl-citizen [/code] Я использую предварительно обученную модель MoviNet. из-за вычислительных проблем я пытаюсь научить модель понимать только 200 знаков из набора данных. Вот мой код: [code]# Required Libraries Import import numpy as np import pandas as pd import os import cv2 import tensorflow as tf import random import pathlib import matplotlib.pyplot as plt from tensorflow.keras import layers from tensorflow.keras.optimizers import Adam from tensorflow.keras.losses import SparseCategoricalCrossentropy from official.projects.movinet.modeling import movinet, movinet_model
# Load Data and Basic Setup # Load the training, validation, and test files trainFile = pd.read_csv('/content/ASL_Citizen/ASL_Citizen/splits/train.csv') valFile = pd.read_csv('/content/ASL_Citizen/ASL_Citizen/splits/val.csv') testFile = pd.read_csv('/content/ASL_Citizen/ASL_Citizen/splits/test.csv')
# Set limit for the number of classes MAX_CLASSES = 200 # Keeping this to limit to 200 classes, can be removed if you want all classes.
# Map all training, validation, and test files by class mapOfTrain = {} mapOfVal = {} mapOfTest = {}
# Use all videos for each selected class for idx, word in enumerate(trainFile['Gloss'].unique()[:MAX_CLASSES]): # Limit to first 200 classes mapOfTrain[word] = trainFile[trainFile['Gloss'] == word]['Video file'].values.tolist() mapOfVal[word] = valFile[valFile['Gloss'] == word]['Video file'].values.tolist() mapOfTest[word] = testFile[testFile['Gloss'] == word]['Video file'].values.tolist()
# Function to format frames (resize, pad, normalize) def format_frames(frame, output_size): frame = tf.image.convert_image_dtype(frame, tf.float32) return tf.image.resize_with_pad(frame, *output_size)
for _ in range(n_frames): ret, frame = src.read() if ret: result.append(format_frames(frame, output_size)) else: result.append(np.zeros_like(result[0])) src.release() return np.array(result)[..., [2, 1, 0]]
# Limit the number of videos per class to 15 def limit_videos(data_map, max_videos=15): limited_data_map = {} for label, video_files in data_map.items(): limited_data_map[label] = video_files[:max_videos] # Limit to max_videos per class return limited_data_map
# Limit videos in each dataset map mapOfTrain = limit_videos(mapOfTrain, max_videos=15) mapOfVal = limit_videos(mapOfVal, max_videos=15) mapOfTest = limit_videos(mapOfTest, max_videos=15)
# Frame Generator Class without Augmentation class FrameGenerator: def __init__(self, data_dict, base_path, n_frames): self.data_dict = data_dict # Dictionary of {label: list of video file names} self.base_path = base_path # Base path to the 'videos' folder self.n_frames = n_frames self.class_names = sorted(data_dict.keys()) self.class_ids_for_name = {name: idx for idx, name in enumerate(self.class_names)}
def __call__(self): for label, video_files in self.data_dict.items(): class_id = self.class_ids_for_name[label] for video_file in video_files: video_path = os.path.join(self.base_path, video_file) video_frames = frames_from_video_file(video_path, self.n_frames) yield video_frames, class_id
# Dataset Creation for Training, Testing, and Validation without augmentation batch_size = 32 num_frames = 16 output_signature = (tf.TensorSpec(shape=(None, None, None, 3), dtype=tf.float32), tf.TensorSpec(shape=(), dtype=tf.int16))
# Check if weights loaded successfully if status.assert_existing_objects_matched(): print("All pre-trained weights matched and loaded successfully!") else: print("Some pre-trained weights did not match. Check for issues.")
# Transfer Learning Setup: Adjust Model for NUM_CLASSES model = movinet_model.MovinetClassifier(backbone=backbone, num_classes=NUM_CLASSES) model.build([32, num_frames, resolution, resolution, 3])
# Compile the Model with a higher learning rate loss_obj = SparseCategoricalCrossentropy() optimizer = tfk.optimizers.Adam() model.compile(loss=loss_obj, optimizer=optimizer, metrics=['accuracy'])
training_results = model.fit( train_ds, validation_data=val_ds, epochs=50, validation_freq=1, verbose=1, ) [/code] но у меня по-прежнему низкая точность как при обучении, так и при проверке. Я пытался изменить эпохи, размер пакета, номера кадров и шаг кадров. но каждый раз я получаю очень низкие цифры. вот что я получаю: [code]movinet_a0_base/ movinet_a0_base/checkpoint movinet_a0_base/ckpt-1.data-00000-of-00001 movinet_a0_base/ckpt-1.index WARNING:tensorflow:Detecting that an object or model or tf.train.Checkpoint is being deleted with unrestored values. See the following logs for the specific values in question. To silence these warnings, use `status.expect_partial()`. See https://www.tensorflow.org/api_docs/python/tf/train/Checkpoint#restorefor details about the status object returned by the restore function. WARNING:tensorflow:Value in checkpoint could not be found in the restored object: (root)._save_counter All pre-trained weights matched and loaded successfully! Epoch 1/50 94/94 [==============================] - 594s 6s/step - loss: 16.0641 - accuracy: 0.0000e+00 - val_loss: 16.0391 - val_accuracy: 0.0049 Epoch 2/50 94/94 [==============================] - 563s 6s/step - loss: 16.0371 - accuracy: 0.0050 - val_loss: 16.0391 - val_accuracy: 0.0049 Epoch 3/50 94/94 [==============================] - 558s 6s/step - loss: 16.0371 - accuracy: 0.0050 - val_loss: 16.0391 - val_accuracy: 0.0049 Epoch 4/50 16/94 [====>.........................] - ETA: 6:25 - loss: 15.6459 - accuracy: 0.0293 [/code] в чем проблема. Надеюсь, кто-нибудь сможет мне помочь.
function st_makeenvelope (двойная точность, двойная точность, двойная точность, двойная точность, целое число) не существует
Подсказка: Никакая функция не соответствует данным имени и типам аргументов. Вам может потребоваться добавить явные типы....
Я скопировал пример кода прямо с официального сайта Keras и отредактировал его, чтобы создать модель машинного обучения.
Я использую Google Colab для своего кода.
Ссылка: https ://keras.io/examples/vision/image_classification_from_scratch/
import...
Я создаваю модель Seedencetransformer ('All-Minilm-L6-V2'), используя словарь (JSON) с названием Category_Descriptions в качестве набора данных.
Ниже приведен Структура данных Category_descriptions :
{
CategoryA : {
CategorySearch : Description...
Я создаваю модель Seedencetransformer ('All-Minilm-L6-V2'), используя словарь (JSON) с названием Category_Descriptions в качестве набора данных.
Ниже приведен Структура данных Category_descriptions :
{
CategoryA : {
CategorySearch : Description...
Матрица путаницы показывает, как реальные метки сравниваются с прогнозируемыми метками для задачи двоичной классификации.
Используя матрицу путаницы, вычислите следующее:
Точность: какая часть прогнозов оказалась верной?
Точность: какая доля...