Я создал алгоритм для построения входных данных модели:
Код: Выделить всё
import os
import cv2
import numpy as np
import pandas as pd
# Settings
video_folder = 'train' # Path to the folder containing videos
output_folder = 'train_npy' # Folder to save .npy files
csv_file = 'train.csv' # Path to the CSV file
frames_per_video = 16 # Number of frames per video (time)
pixels_x, pixels_y = 112, 112 # Frame dimensions
# Load the CSV with video names and categories
df = pd.read_csv(csv_file) # Columns: 'video_name', 'tag'
# Number of classes based on the unique values in the 'tag' column
unique_tags = df['tag'].unique()
num_categories = len(unique_tags) # Define the number of classes based on the data
# Dictionary to map categories to indices
tag_to_index = {tag: idx for idx, tag in enumerate(unique_tags)}
# Create output folders if they don't exist
os.makedirs(output_folder, exist_ok=True)
# Function to extract and process frames
def extract_frames(video_path, num_frames=16, size=(pixels_x, pixels_y)):
cap = cv2.VideoCapture(video_path)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_interval = max(total_frames // num_frames, 1)
frames = []
for i in range(num_frames):
cap.set(cv2.CAP_PROP_POS_FRAMES, i * frame_interval)
ret, frame = cap.read()
if not ret:
break
frame = cv2.resize(frame, size)
frames.append(frame)
# Fill with zero frames if necessary
while len(frames) < num_frames:
frames.append(np.zeros((pixels_x, pixels_y, 3), dtype=np.uint8))
cap.release()
return np.array(frames) # Shape: (frames, pixels_x, pixels_y, 3)
# Loop to process all videos
for idx, row in df.iterrows():
video_name = row['video_name']
class_label = row['tag']
# Full path to the video
video_path = os.path.join(video_folder, video_name)
# Check if the video exists
if not os.path.exists(video_path):
print(f"Video {video_name} not found!")
continue
# Process frames and save as .npy
scene_data = extract_frames(video_path, frames_per_video)
scene_data = scene_data.transpose(0, 3, 1, 2) # Change order to (frames, channels, rows, columns)
np.save(os.path.join(output_folder, f'scene_{idx}.npy'), scene_data)
# Create and save categories (independent outputs)
category_data = np.zeros((num_categories, 1, frames_per_video, 1))
category_data[tag_to_index[class_label], 0, :, 0] = 1 # Mark the category as 1 for all frames
np.save(os.path.join(output_folder, f'category_{idx}.npy'), category_data)
print("Processing completed!")
ValueError: Целевые аргументы и выходные данные должны иметь одинаковую форму. Получено: target.shape=(None, 1, 16), output.shape=(None, 16, 1)
при этом фигуры соответствуют уроку:
scene_0.npy.shape = (16, 3, 112, 112)
category_0.npy.shape = (5, 1, 16, 1)
Почему цель и результат имеют разную форму? Это ошибка руководства или ошибка в формате ввода?
Подробнее здесь: https://stackoverflow.com/questions/792 ... m-tutorial