Код: Выделить всё
const int DSIZE = 4;
const int block_size = 2;
__global__ void hdot(const float *A, const float *B, float *C, int ds){
int idx_A = threadIdx.x + blockIdx.x * blockDim.x;
int idx_B = threadIdx.x + blockIdx.x * blockDim.x;
int idy_B = threadIdx.y + blockIdx.y * blockDim.y;
__shared__ float sharedA[block_size];
__shared__ float sharedB[block_size][block_size];
if ((idx_B < ds) && (idy_B < ds)){
sharedA[threadIdx.x] = A[idx_A];
sharedB[threadIdx.y][threadIdx.x] = B[idx_B * ds + idy_B];
__syncthreads();
C[idx_B * ds + idy_B] = sharedA[threadIdx.x] * sharedB[threadIdx.y][threadIdx.x];
}
}
Большое спасибо!
Подробнее здесь: https://stackoverflow.com/questions/783 ... rd-product
Мобильная версия