Мы пытаемся подключить два графических процессора, расположенных на двух серверах через RDMA и Infinibands. Графильные процессоры-NVIDIA RTX 6000 ADA, а Infinbands-NVIDIA CONNECTX-6. < /P>
Конфигурация сервера < /p>
Наш сервер имеет конфигурацию изображения, где у нас есть графический процессор. (Команда терминала nvidia -smi topo -m) Вы можете видеть, что соединение является узлом. < /p>
вывод терминала < /strong>
вывод терминала nvidia -smi topo -m < /p>
Согласно веб -странице: nvidia configuration | Можжевельные сети, это вызывает плохую производительность, но из -за макета нашего сервера невозможно переместить ни ГПУ, ни подключение x.import socket
import cupy as cp
import ctypes
from pyverbs.device import Context
from pyverbs.pd import PD
from pyverbs.cq import CQ
from pyverbs.qp import QPCap, QPInitAttr, QPAttr, QP
from pyverbs.mr import MR
from pyverbs.enums import *
from pyverbs.addr import AHAttr
from pyverbs import device as d
from pyverbs.wr import SGE, SendWR
import ctypes
# Setup RDMA device
lst = d.get_device_list()
ctx = Context(name=lst[0].name.decode())
pd = PD(ctx)
cq = CQ(ctx, 10)
# Allocate GPU buffer with CuPy
gpu_buf = cp.arange(1, 11, dtype=cp.uint8) # [1,2,3,...,10] on GPU
gpu_ptr = gpu_buf.data.ptr
#gpu_ptr = ctypes.c_uint64(gpu_buf.data.ptr).value
# Register GPU memory
mr = MR(creator=pd, length=gpu_buf.nbytes, address=gpu_ptr,
access=IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_ON_DEMAND | IBV_ACCESS_MW_BIND )
# Setup QP
cap = QPCap(max_send_wr=10, max_recv_wr=10, max_send_sge=1, max_recv_sge=1)
init_attr = QPInitAttr(qp_type=IBV_QPT_RC, scq=cq, rcq=cq, cap=cap)
qp = QP(pd, init_attr)
# Exchange info
port_attr = ctx.query_port(1)
lid = port_attr.lid
psn = 0
s = socket.socket()
s.connect(('192.168.2.5', 18515)) # Receiver IP
s.send(f"{lid},{qp.qp_num},{psn},{mr.rkey},{hex(mr.buf)},".encode())
remote_info = s.recv(1024).decode().split(',')
remote_lid, remote_qpn, remote_psn, remote_rkey, remote_addr = int(remote_info[0]), int(remote_info[1]), int(remote_info[2]), int(remote_info[3]), int(remote_info[4], 16)
# QP state transitions
attr = QPAttr()
attr.qp_state = IBV_QPS_INIT
attr.pkey_index = 0
attr.port_num = 1
attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_ON_DEMAND | IBV_ACCESS_MW_BIND
qp.modify(attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)
attr = QPAttr()
attr.qp_state = IBV_QPS_RTR
attr.path_mtu = 5
attr.dest_qp_num = remote_qpn
attr.rq_psn = remote_psn
attr.max_dest_rd_atomic = 1
attr.min_rnr_timer = 12
attr.ah_attr = AHAttr(port_num=1, dlid=remote_lid)
qp.modify(attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU |
IBV_QP_DEST_QPN | IBV_QP_RQ_PSN |
IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER)
attr = QPAttr()
attr.qp_state = IBV_QPS_RTS
attr.timeout = 14
attr.retry_cnt = 7
attr.rnr_retry = 7
attr.sq_psn = psn
attr.max_rd_atomic = 1
qp.modify(attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT |
IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC)
# Post RDMA write
sge = SGE(addr=mr.buf, length=mr.length, lkey=mr.lkey)
wr = SendWR(
wr_id=1,
num_sge=1,
sg=[sge],
opcode=IBV_WR_RDMA_WRITE,
send_flags=IBV_SEND_SIGNALED
)
wr.set_wr_rdma(rkey=remote_rkey, addr=remote_addr)
ret = qp.post_send(wr)
if ret != 0:
print("Failed to post RDMA write!")
else:
print("RDMA write posted.")
wc = cq.poll(num_entries=1)
for _ in range(1000000):
if wc[0] > 0 :
print("RDMA write completed successfully.")
break
else:
print("RDMA write failed or no completion.")
print("Sender GPU buffer sent:", gpu_buf.get()) # Copy to host for printing
s.close()
< /code>
Код приемника следует за той же структурой: < /p>
import socket
import cupy as cp
import ctypes
import time
from pyverbs.device import Context
from pyverbs.pd import PD
from pyverbs.cq import CQ
from pyverbs.qp import QPCap, QPInitAttr, QPAttr, QP
from pyverbs.mr import MR
from pyverbs.enums import *
from pyverbs.addr import AHAttr
from pyverbs import device as d
# Setup RDMA device
lst = d.get_device_list()
ctx = Context(name=lst[0].name.decode())
pd = PD(ctx)
cq = CQ(ctx, 10)
# Allocate GPU buffer
gpu_buf = cp.zeros(10, dtype=cp.uint8)
gpu_ptr = gpu_buf.data.ptr
# Register GPU memory
mr = MR(creator=pd, length=gpu_buf.nbytes, address=gpu_ptr,
access=IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_ON_DEMAND | IBV_ACCESS_MW_BIND)
# Setup QP
cap = QPCap(max_send_wr=10, max_recv_wr=10, max_send_sge=1, max_recv_sge=1)
init_attr = QPInitAttr(qp_type=IBV_QPT_RC, scq=cq, rcq=cq, cap=cap)
qp = QP(pd, init_attr)
# Exchange info
port_attr = ctx.query_port(1)
lid = port_attr.lid
psn = 0
s = socket.socket()
s.bind(('', 18515))
s.listen(1)
conn, _ = s.accept()
remote_info = conn.recv(1024).decode().split(',')
remote_lid, remote_qpn, remote_psn, remote_rkey, remote_addr = int(remote_info[0]), int(remote_info[1]), int(
remote_info[2]), int(remote_info[3]), int(remote_info[4], 16)
conn.send(f"{lid},{qp.qp_num},{psn},{mr.rkey},{hex(mr.buf)},".encode())
# QP state transitions
attr = QPAttr()
attr.qp_state = IBV_QPS_INIT
attr.pkey_index = 0
attr.port_num = 1
attr.qp_access_flags = IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_ON_DEMAND | IBV_ACCESS_MW_BIND
qp.modify(attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT | IBV_QP_ACCESS_FLAGS)
attr = QPAttr()
attr.qp_state = IBV_QPS_RTR
attr.path_mtu = 5
attr.dest_qp_num = remote_qpn
attr.rq_psn = remote_psn
attr.max_dest_rd_atomic = 1
attr.min_rnr_timer = 12
attr.ah_attr = AHAttr(port_num=1, dlid=remote_lid)
qp.modify(attr, IBV_QP_STATE | IBV_QP_AV | IBV_QP_PATH_MTU |
IBV_QP_DEST_QPN | IBV_QP_RQ_PSN |
IBV_QP_MAX_DEST_RD_ATOMIC | IBV_QP_MIN_RNR_TIMER)
attr = QPAttr()
attr.qp_state = IBV_QPS_RTS
attr.timeout = 14
attr.retry_cnt = 7
attr.rnr_retry = 7
attr.sq_psn = psn
attr.max_rd_atomic = 1
qp.modify(attr, IBV_QP_STATE | IBV_QP_TIMEOUT | IBV_QP_RETRY_CNT |
IBV_QP_RNR_RETRY | IBV_QP_SQ_PSN | IBV_QP_MAX_QP_RD_ATOMIC)
print('Receiver ready. Waiting for RDMA write...')
time.sleep(5) # Wait for sender
print('Receiver GPU buffer after RDMA write:', gpu_buf.get()) # Copy to host for printing
s.close()
< /code>
Но мы не можем увидеть изменения в сообщении, отправленных на стороне приемника. Можно ли установить связь между ними, несмотря на то, что имеет тип соединения узла? ЦП и получить более низкую задержку.
Подробнее здесь: https://stackoverflow.com/questions/796 ... x-and-rdma
GPU в GPU прямой передачу данных с помощью ConnectX и RDMA ⇐ Python
-
- Похожие темы
- Ответы
- Просмотры
- Последнее сообщение