Сопоставить билинейные выборочные объекты с обратно воксельнымиPython

Программы на Python
Ответить Пред. темаСлед. тема
Anonymous
 Сопоставить билинейные выборочные объекты с обратно воксельными

Сообщение Anonymous »

В настоящее время я работаю над проектом, в котором разрабатываю класс специально для билинейной выборки. Моя цель — сопоставить функции, извлеченные в процессе билинейной выборки, с соответствующими местами в созданной сетке вокселов.

Код: Выделить всё

class BilinearSamplingTransform(nn.Module):
def __init__(self, model_cfg):
super().__init__()
self.model_cfg = model_cfg
self.norm = self.model_cfg.NORMALIZER
in_channel = self.model_cfg.IN_CHANNEL
out_channel = self.model_cfg.OUT_CHANNEL
self.image_size = self.model_cfg.IMAGE_SIZE
self.feature_size = self.model_cfg.FEATURE_SIZE
self.xbound = self.model_cfg.XBOUND
self.ybound = self.model_cfg.YBOUND
self.zbound = self.model_cfg.ZBOUND
self.dbound = self.model_cfg.DBOUND
downsample = self.model_cfg.DOWNSAMPLE

self.C = out_channel
self.grid =self.create_voxel_grid() # [D, H, W, 3]
self.D = self.grid.shape[0]

def create_voxel_grid(self):
"""
Create a voxel grid based on image size, feature size, and depth.
Returns:
voxel_grid (torch.Tensor): A 3D voxel grid containing the voxel coordinates [D, H, W, 3].
"""
fZ, fX, fY = self.zbound[2], self.xbound[2], self.ybound[2]

# Width (X) values: Linearly spaced positions along the xbound to cover the feature width
xs = torch.linspace(self.xbound[0], self.xbound[1], self.xbound[2], dtype=torch.float).view(1, 1, fY).expand(fZ, fX, fY)

# Height (Y) values: Linearly spaced positions along the ybound to cover the feature height
ys = torch.linspace(self.ybound[0], self.ybound[1], self.ybound[2], dtype=torch.float).view(1, fX, 1).expand(fZ, fX, fY)

zs = torch.linspace(self.zbound[0], self.zbound[1], self.zbound[2], dtype=torch.float).view(fZ, 1, 1).expand(fZ, fX, fY)

# Stack the voxel grid along the depth (ds), height (ys), and width (xs) axes
voxel_grid = torch.stack((xs, ys, zs), -1)  # Shape: [D, H, W, 3]

return nn.Parameter(voxel_grid, requires_grad=False)

def align_grid_with_pt(self, **kwargs):
points = self.grid
B, D, W, H = 1, points.size(0), points.size(1), points.size(2)

# Handle extra rotations
if "extra_rots" in kwargs:
extra_rots = kwargs["extra_rots"]
points = points.view(B, -1, 3)  # Flatten spatial dimensions
points = torch.bmm(points, extra_rots.transpose(1, 2))  # Batch matrix multiplication
points = points.view(B, D, W, H, 3)  # Correct reshaping using known dimensions

# Handle extra translations
if "extra_trans" in kwargs:
extra_trans = kwargs["extra_trans"]
points += extra_trans.view(B, 1, 1, 1, 3)  # Broadcast over spatial dimensions

# Visualization function here
return points

def get_geometry(self,points_gt, camera2lidar_rots, camera2lidar_trans, intrins, post_rots, post_trans, **kwargs):

camera2lidar_rots = camera2lidar_rots.to(torch.float)
camera2lidar_trans = camera2lidar_trans.to(torch.float)
intrins = intrins.to(torch.float)
post_rots = post_rots.to(torch.float)
post_trans = post_trans.to(torch.float)

B, N, _ = camera2lidar_trans.shape

# cam_to_lidar
points = torch.cat((points[:, :, :, :, :, :2] * points[:, :, :, :, :, 2:3], points[:, :, :, :, :, 2:3]), 5)
combine = camera2lidar_rots.matmul(torch.inverse(intrins))
points = combine.view(B, N, 1, 1, 1, 3, 3).matmul(points).squeeze(-1)
points += camera2lidar_trans.view(B, N, 1, 1, 1, 3)

if "extra_rots" in kwargs:
extra_rots = kwargs["extra_rots"]
points = extra_rots.view(B, 1, 1, 1, 1, 3, 3).repeat(1, N, 1, 1, 1, 1, 1) \
.matmul(points.unsqueeze(-1)).squeeze(-1)

if "extra_trans"  in kwargs:
extra_trans = kwargs["extra_trans"]
points += extra_trans.view(B, 1, 1, 1, 1, 3).repeat(1, N, 1, 1, 1, 1)

return points

def forward(self, batch_dict):
img_org = batch_dict['camera_imgs']
x = batch_dict['image_fpn']
x = x[0]
BN, C, H, W = x.size()
img = x.view(int(BN / 6), 6, C, H, W) # B,N,C,H,W

points = batch_dict['points'] # only for visualisation but won't be used
camera_intrinsics = batch_dict['camera_intrinsics'] # intrinsic
camera2lidar = batch_dict['camera2lidar'] # extrinsic
img_aug_matrix = batch_dict['img_aug_matrix']
lidar_aug_matrix = batch_dict['lidar_aug_matrix']

lidar2image = batch_dict['lidar2image'] # extrinsics and intrinsics
lidar2camera = batch_dict['lidar2camera']# extrinsics only

intrins = camera_intrinsics[..., :3, :3]
post_rots = img_aug_matrix[..., :3, :3]
post_trans = img_aug_matrix[..., :3, 3]
camera2lidar_rots = camera2lidar[..., :3, :3]
camera2lidar_trans = camera2lidar[..., :3, 3]
extra_rots = lidar_aug_matrix[..., :3, :3]
extra_trans = lidar_aug_matrix[..., :3, 3]

batch_size = BN // 6
all_projected_points = []
TransMode = 'lidar2image' # lidar2camera
N = 6
plot_project = False
if plot_project and N == 6:
fig, axs = plt.subplots(2, 3, figsize=(35, 20))
#fig2, axs2 = plt.subplots(2, 3, figsize=(35, 20))
for b in range(batch_size):
####### In depthlss, first they removing the augmentation, we don't need to do it because we are already in the world coordinate
D, H, W, _= self.grid.shape
cur_img_aug_matrix = img_aug_matrix[b]
cur_lidar_aug_matrix = lidar_aug_matrix[b]
cur_lidar2image = lidar2image[b]
cur_lidar2camera = lidar2camera[b]

######### ####### ####### ####### #######  pts transformed ####### ####### ####### ####### ####### #######
batch_mask = points[:, 0] == b
cur_coords = points[batch_mask][:, 1:4]
cur_coords_before = cur_coords.clone() #ground truth or start vector

######### ####### ####### ####### #######  voxel transformed ####### ####### ####### ####### ####### #####
grid_transformed = self.align_grid_with_pt(extra_rots=extra_rots, extra_trans=extra_trans)

cur_coords_voxel = grid_transformed.view(D * H * W, 3) #[M=163840,3]
######### ####### ####### ####### ####### ####### ####### ####### ####### ####### ####### ####### #######
def inverse_aug(cur_coords_c, TransformationMode="lidar2camera"):
cur_coords_c -= cur_lidar_aug_matrix[:3, 3]
cur_coords_c = torch.inverse(cur_lidar_aug_matrix[:3, :3]).matmul(
cur_coords_c.transpose(1, 0)
)

if TransformationMode=="lidar2camera":
cur_coords_c = cur_lidar2camera[:, :3, :3].matmul(cur_coords_c)
cur_coords_c += cur_lidar2camera[:, :3, 3].reshape(-1, 3, 1)
elif TransformationMode=="lidar2image":
cur_coords_c = cur_lidar2image[:, :3, :3].matmul(cur_coords_c)
cur_coords_c += cur_lidar2image[:, :3, 3].reshape(-1, 3, 1)
return cur_coords_c

cur_coords_before= inverse_aug(cur_coords_before, "None") ## original
#cur_coords= inverse_aug(cur_coords, TransMode) # point based transformation
cur_coords = inverse_aug(cur_coords_voxel, TransMode) #voxel based transformation [N,C,M]

######### ####### ####### ####### ####### ####### ####### ####### ####### ####### #######
# visualize_lidar_to_camera( cur_coords_before.permute(1,0) , cur_coords[0,:,:].permute(1,0), cur_coords_voxel[0,:,:].permute(1,0)  )
######### ####### ####### ####### ####### ####### ####### ####### ####### ####### #######
# filter points in front of the cameras
depth = cur_coords[:, 2, :]   # Z-coordinates in the camera space
valid_depth_mask = depth >= 0.1   #[N,M]
cur_coords = cur_coords[:,:,valid_depth_mask.any(axis=0)] #[N,C,M']
###########################################################
cur_coords[:, 2, :] = torch.clamp(cur_coords[:, 2, :], 1e-5, 1e5)
cur_coords[:, :2, :] /= cur_coords[:, 2:3, :]

# do image aug
cur_coords = cur_img_aug_matrix[:, :3, :3].matmul(cur_coords)
cur_coords += cur_img_aug_matrix[:, :3, 3].reshape(-1, 3, 1)
cur_coords = cur_coords[:, :2, :].transpose(1,  2)

# normalize coords
cur_coords = cur_coords[..., [1, 0]]

#for n in range(0, N):
camera_view = img_org[b, :, :, :, :].permute(0, 2, 3, 1).cpu().detach().numpy()
camera_view_tensor = img_org[b, :, :, :, :].permute(0, 2, 3, 1)

# filter points outside of images
N, H_img, W_img, C = camera_view_tensor.shape
on_img = (
(cur_coords[..., 0] < self.image_size[0])
& (cur_coords[..., 0] >= 0)
& (cur_coords[..., 1] < self.image_size[1])
& (cur_coords[..., 1] >= 0)
)

valid_points = cur_coords[:, on_img[1]]
######### Normalize Valid Points Between [-1, 1] ########
normalized_points = torch.zeros_like(valid_points)
normalized_points[:,:, 0] = 2.0 * (valid_points[:, :, 0] / (H_img - 1)) - 1.0  # Normalize y-coordinates
normalized_points[:,:, 1] = 2.0 * (valid_points[:, :, 1] / (W_img - 1)) - 1.0 # [N, M',2]

grid = normalized_points.unsqueeze(1).cuda()  # Shape [S, H_out, W_out, 2]
features_list  = []
for i in range(0,N):
img_s =camera_view_tensor[i].unsqueeze(0).permute(0, 3, 1, 2) #[B, C, H_in, W_in]
grid_s = grid[i].unsqueeze(0)
features_points = F.grid_sample(img_s, grid_s,mode='bilinear',
align_corners=None) # (B,N,C,H_out,W_out)
features_with_location = torch.cat([features_points, grid_s], dim=-1)
features_list.append(features_points)

features_points = torch.stack(features_list, dim=1) #[B = 1,S = 6,C= 3,H= 1,W =22965]
Цель:
Сопоставление объектов с вокселами: какие шаги необходимы для размещения извлеченных объектов в правильных местах в сетке вокселей?

Подробнее здесь: https://stackoverflow.com/questions/790 ... voxel-back
Реклама
Ответить Пред. темаСлед. тема

Быстрый ответ

Изменение регистра текста: 
Смайлики
:) :( :oops: :roll: :wink: :muza: :clever: :sorry: :angel: :read: *x)
Ещё смайлики…
   
К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми.

Максимально разрешённый размер вложения: 15 МБ.

  • Похожие темы
    Ответы
    Просмотры
    Последнее сообщение

Вернуться в «Python»