Anonymous
Тонкая настройка TrOCR в чипе Mac M4 (MPS)
Сообщение
Anonymous » 14 янв 2025, 18:42
У меня возникла проблема при переключении моего устройства на MPS. Мое обучение проходит без проблем на процессоре, но когда я настраиваю устройство на MPS, я получаю следующую ошибку:
Код: Выделить всё
RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.
Не могли бы вы помочь мне понять, почему возникает эта ошибка и как ее устранить? Большое спасибо за помощь!
Код: Выделить всё
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image, ImageDraw, ImageFont
import random
import string
def generate_text_image(text, width=384, height=96):
image = Image.new("RGB", (width, height), color="white")
draw = ImageDraw.Draw(image)
try:
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 32)
except:
font = ImageFont.load_default()
bbox = draw.textbbox((0, 0), text, font=font)
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
x = (width - text_width) // 2
y = (height - text_height) // 2
draw.text((x, y), text, fill="black", font=font)
return image
class OCRDataset(Dataset):
def __init__(self, num_samples=1000, processor=None):
self.processor = processor
self.samples = []
chars = string.ascii_letters + string.digits
for _ in range(num_samples):
text = "".join(random.choices(chars, k=random.randint(5, 10)))
image = generate_text_image(text, 230, 100)
self.samples.append((image, text))
def __len__(self):
return len(self.samples)
def __getitem__(self, idx):
image, text = self.samples[idx]
pixel_values = self.processor(image, return_tensors="pt").pixel_values
labels = self.processor.tokenizer(
text, padding="max_length", max_length=20, return_tensors="pt"
).input_ids
return {"pixel_values": pixel_values.squeeze(), "labels": labels.squeeze()}
def main():
processor = TrOCRProcessor.from_pretrained(
"microsoft/trocr-base-handwritten", use_fast=True
)
model = VisionEncoderDecoderModel.from_pretrained(
"microsoft/trocr-base-handwritten"
)
model.config.decoder_start_token_id = processor.tokenizer.cls_token_id
model.config.pad_token_id = processor.tokenizer.pad_token_id
model.config.vocab_size = model.config.decoder.vocab_size
train_dataset = OCRDataset(num_samples=1000, processor=processor)
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
device = "mps" # or `cpu``
print(f"Training on device: {device}")
model.to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
num_epochs = 3
model.train()
for epoch in range(num_epochs):
total_loss = 0
print(f"\nStarting Epoch {epoch+1}/{num_epochs}")
for batch_idx, batch in enumerate(train_dataloader):
pixel_values = batch["pixel_values"].to(device)
labels = batch["labels"].to(device)
outputs = model(pixel_values=pixel_values, labels=labels)
loss = outputs.loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
if (batch_idx + 1) % 10 == 0:
current_loss = total_loss / (batch_idx + 1)
print(
f"Batch {batch_idx+1}/{len(train_dataloader)} | "
f"Current Loss: {current_loss:.4f}"
)
avg_loss = total_loss / len(train_dataloader)
print(f"\nEpoch {epoch+1} Summary:")
print(f"Average Loss: {avg_loss:.4f}")
print("-" * 50)
print("\nTraining completed!")
print(f"Saving model to: models/trocr")
model.save_pretrained("models/trocr")
processor.save_pretrained("models/trocr")
print("Model saved successfully!")
if __name__ == "__main__":
main()
Я подтвердил, что MPS доступен на Mac с чипом M4.
Подробнее здесь:
https://stackoverflow.com/questions/793 ... 4-chip-mps
1736869367
Anonymous
У меня возникла проблема при переключении моего устройства на MPS. Мое обучение проходит без проблем на процессоре, но когда я настраиваю устройство на MPS, я получаю следующую ошибку: [code]RuntimeError: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.[/code] Не могли бы вы помочь мне понять, почему возникает эта ошибка и как ее устранить? Большое спасибо за помощь! [code]import torch from torch.utils.data import Dataset, DataLoader from transformers import TrOCRProcessor, VisionEncoderDecoderModel from PIL import Image, ImageDraw, ImageFont import random import string def generate_text_image(text, width=384, height=96): image = Image.new("RGB", (width, height), color="white") draw = ImageDraw.Draw(image) try: font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", 32) except: font = ImageFont.load_default() bbox = draw.textbbox((0, 0), text, font=font) text_width = bbox[2] - bbox[0] text_height = bbox[3] - bbox[1] x = (width - text_width) // 2 y = (height - text_height) // 2 draw.text((x, y), text, fill="black", font=font) return image class OCRDataset(Dataset): def __init__(self, num_samples=1000, processor=None): self.processor = processor self.samples = [] chars = string.ascii_letters + string.digits for _ in range(num_samples): text = "".join(random.choices(chars, k=random.randint(5, 10))) image = generate_text_image(text, 230, 100) self.samples.append((image, text)) def __len__(self): return len(self.samples) def __getitem__(self, idx): image, text = self.samples[idx] pixel_values = self.processor(image, return_tensors="pt").pixel_values labels = self.processor.tokenizer( text, padding="max_length", max_length=20, return_tensors="pt" ).input_ids return {"pixel_values": pixel_values.squeeze(), "labels": labels.squeeze()} def main(): processor = TrOCRProcessor.from_pretrained( "microsoft/trocr-base-handwritten", use_fast=True ) model = VisionEncoderDecoderModel.from_pretrained( "microsoft/trocr-base-handwritten" ) model.config.decoder_start_token_id = processor.tokenizer.cls_token_id model.config.pad_token_id = processor.tokenizer.pad_token_id model.config.vocab_size = model.config.decoder.vocab_size train_dataset = OCRDataset(num_samples=1000, processor=processor) train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True) device = "mps" # or `cpu`` print(f"Training on device: {device}") model.to(device) optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5) num_epochs = 3 model.train() for epoch in range(num_epochs): total_loss = 0 print(f"\nStarting Epoch {epoch+1}/{num_epochs}") for batch_idx, batch in enumerate(train_dataloader): pixel_values = batch["pixel_values"].to(device) labels = batch["labels"].to(device) outputs = model(pixel_values=pixel_values, labels=labels) loss = outputs.loss optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() if (batch_idx + 1) % 10 == 0: current_loss = total_loss / (batch_idx + 1) print( f"Batch {batch_idx+1}/{len(train_dataloader)} | " f"Current Loss: {current_loss:.4f}" ) avg_loss = total_loss / len(train_dataloader) print(f"\nEpoch {epoch+1} Summary:") print(f"Average Loss: {avg_loss:.4f}") print("-" * 50) print("\nTraining completed!") print(f"Saving model to: models/trocr") model.save_pretrained("models/trocr") processor.save_pretrained("models/trocr") print("Model saved successfully!") if __name__ == "__main__": main() [/code] Я подтвердил, что MPS доступен на Mac с чипом M4. Подробнее здесь: [url]https://stackoverflow.com/questions/79355534/trocr-fine-tuning-in-mac-m4-chip-mps[/url]