Код на данный момент:
Код: Выделить всё
from transformers import AutoTokenizer, CLIPTextModelWithProjection
model = CLIPTextModelWithProjection.from_pretrained("openai/clip-vit-base-patch32")
tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
from diffusers import StableDiffusionPipeline, DDIMScheduler
import torch
path ="path_to_my_model.safetensors"
pipe = StableDiffusionPipeline.from_single_file(f"{path}", torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
pipe.to("cuda")
import numpy as np
import torch
prompt = "some random prompt"
text_input = tokenizer(prompt, padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
text_embeddings = model(text_input.input_ids)[0]
batch_size = len(text_input)
uncond_input = tokenizer(
[""] * batch_size, padding="max_length", max_length=tokenizer.model_max_length, return_tensors="pt", truncation=True
)
uncond_embeddings = model(uncond_input.input_ids)[0]
text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
output_image = pipe(prompt_embeds=text_embeddings).images[0]
Хотя форма встраивания [ 3,512], text_embeddings.shape torch.Size([3, 512])
Не могу понять, в чем проблема.
Я также старался не конкатировать с uncond_embedding.
Подробнее здесь: https://stackoverflow.com/questions/791 ... -to-unpack