Реализация diffPool в PyTorch для неконтролируемой кластеризации однородного графа

Реализация diffPool в PyTorch для неконтролируемой кластеризации однородного графа ⇐ Python

Ответить

1 сообщение • Страница 1 из 1

Anonymous

Реализация diffPool в PyTorch для неконтролируемой кластеризации однородного графа

Цитата

Сообщение Anonymous » 18 ноя 2024, 07:26

Я пытаюсь реализовать неконтролируемую многоуровневую кластеризацию на основе подхода difpool.

Код: Выделить всё

import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, DenseGCNConv, dense_diff_pool
from torch_geometric.utils import to_dense_adj
import networkx as nx
import matplotlib.pyplot as plt

# --- Step 1: Generate a Random Graph Using NetworkX ---
num_nodes = 100  # Number of nodes in the graph
num_edges = 300  # Number of edges to ensure sufficient connectivity

# Create a random graph using NetworkX
G = nx.gnm_random_graph(num_nodes, num_edges)

# Extract edge index from NetworkX graph
edge_index = torch.tensor(list(G.edges)).t().contiguous()

# Generate random node features
node_features = torch.rand((num_nodes, 16))  # 16-dimensional node features

# Create a torch_geometric Data object
data_homogeneous = Data(x=node_features, edge_index=edge_index)

# --- Visualize the Graph Structure ---
plt.figure(figsize=(8, 6))
nx.draw(G, with_labels=True, node_color='lightblue', node_size=500)
plt.title("Random Graph Structure")
plt.show()

# --- Step 2: Implement Multi-layer DiffPool ---
class MultiLayerDiffPool(torch.nn.Module):
def __init__(self, input_dim, hidden_dim, num_pool_layers, initial_num_nodes):
super(MultiLayerDiffPool, self).__init__()
self.num_pool_layers = num_pool_layers
self.projection = torch.nn.Linear(input_dim, hidden_dim)
self.gcn_layers = torch.nn.ModuleList()
self.dense_gcn_layers = torch.nn.ModuleList()
self.pool_assignments = torch.nn.ModuleList()

# Initialize the number of clusters conservatively
current_num_nodes = initial_num_nodes
for i in range(num_pool_layers):
# Reduce nodes gradually: Use a conservative reduction rate
num_clusters = max(8, int(current_num_nodes * 0.8))
# num_clusters = min(current_num_nodes, num_clusters)  # Ensure clusters do not exceed current nodes
current_num_nodes = num_clusters

self.gcn_layers.append(GCNConv(hidden_dim, hidden_dim))
self.dense_gcn_layers.append(DenseGCNConv(hidden_dim, hidden_dim))
self.pool_assignments.append(torch.nn.Linear(hidden_dim, num_clusters))

# Print shapes for debugging
print(f"Layer {i + 1} - x shape: {x.shape}, edge_index shape: {edge_index.shape}")

S = F.softmax(pool_assign(x), dim=-1)
layer_assignments.append(S.detach().cpu().numpy())

def forward(self, x, edge_index):
x = F.relu(self.projection(x))
batch = torch.zeros(x.size(0), dtype=torch.long, device=x.device)
adj_dense = to_dense_adj(edge_index, max_num_nodes=x.size(0))[0]

layer_assignments = []

for i, (gcn, dense_gcn, pool_assign) in enumerate(
zip(self.gcn_layers, self.dense_gcn_layers, self.pool_assignments)
):
x = F.relu(gcn(x, edge_index))
S = F.softmax(pool_assign(x), dim=-1)
layer_assignments.append(S.detach().cpu().numpy())

# Print shapes for debugging
print(f"Layer {i + 1} - x shape: {x.shape}, S shape: {S.shape}")

if S.size(1) >  x.size(0):
raise ValueError("Number of clusters in S cannot exceed the number of nodes.")

# Perform pooling and update x and adj_dense
x, adj_dense, _, _ = dense_diff_pool(x, adj_dense, S, batch)

# Ensure that x is reduced appropriately
x = x.mean(dim=1)
return x, layer_assignments

# --- Revised Training Loop ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MultiLayerDiffPool(
input_dim=node_features.size(1), hidden_dim=32, num_pool_layers=3, initial_num_nodes=num_nodes
).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
data_homogeneous = data_homogeneous.to(device)

# Use the initial node features for unsupervised reconstruction
original_features = data_homogeneous.x.clone().to(device)

model.train()
for epoch in range(100):
optimizer.zero_grad()
x, layer_assignments = model(data_homogeneous.x, data_homogeneous.edge_index)

# Use the reconstructed x as the features for loss calculation
reconstructed_features = x
loss = F.mse_loss(reconstructed_features, original_features)

loss.backward()
optimizer.step()
print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

print("Training complete.")

# --- Step 4: Plot Layer Assignments ---
for layer_idx, S in enumerate(layer_assignments):
print(f"Shape of S for layer {layer_idx + 1}: {S.shape}")

if len(S.shape) == 3 and S.shape[0] == 1:
S = S.squeeze(0)

plt.figure(figsize=(8, 6))
plt.imshow(S, cmap='viridis', aspect='auto')
plt.colorbar(label='Assignment Probability')
plt.title(f"Layer {layer_idx + 1} Node Assignments")
plt.xlabel("Clusters")
plt.ylabel("Nodes")
plt.show()

# --- Step 5: Plot Graph Per Layer ---
for layer_idx, S in enumerate(layer_assignments):
print(f"Shape of S for layer {layer_idx + 1}: {S.shape}")

if len(S.shape) == 3 and S.shape[0] == 1:
S = S.squeeze(0)

cluster_assignments = S.argmax(axis=1)
G_layer = nx.Graph()
G_layer.add_edges_from(data_homogeneous.edge_index.t().tolist())

colors = [cluster_assignments[node] for node in range(data_homogeneous.num_nodes)]

plt.figure(figsize=(10, 8))
nx.draw(
G_layer,
node_color=colors,
with_labels=True,
cmap='viridis',
node_size=300,
edge_color='gray'
)
plt.title(f"Graph Visualization at Layer {layer_idx + 1}")
plt.show()

Вот распечатанный файл из приведенного выше сценария:

Слой 1 — форма x: torch.Size([100 , 32]), S shape: torch.Size([100,
70])

Но возвращает следующую ошибку.

{ "name": "RuntimeError", "message": "индекс 87 выходит за пределы
измерения 0 с размером 70", "stack":
"---- -------------------------------------------------- --------------------- RuntimeError Traceback (последний вызов
последний) Ячейка In[88], строка 94
92 для эпохи в диапазоне (100):
93Optimizer.zero_grad()
---> 94 x, Layer_assignments = model(data_homogeneous.x, data_homogeneous.edge_index)
96 # Использовать реконструированный x в качестве признаков для расчета потерь
97 восстановленных_функций = x
...
Файл
d:\test\.venv\lib\site-packages\torch_geometric\utils\scatter.py:75,
in scatter(src, index, dim, dim_size, уменьшить)
73, если уменьшить == 'сумма' или уменьшить == 'добавить':
74 index = Broadcast(index, src, dim)
---> 75 return src.new_zeros(size).scatter_add(dim, index, src)
77 if уменьшить == 'mean':
78 count = src.new_zeros(dim_size)Ошибка выполнения: индекс 87 выходит за пределы измерения 0 с размером 70"
}

Похоже, что количество кластеров (узлов в пуле) в матрице назначения S превышает количество доступных узлов на следующем уровне. слой 1 - x shape: torch.Size([100, 32]), S shape: torch.Size([100, 20]) означает, что у нас есть 100 узлов с 32 объектами в x, тогда как матрица назначения S сокращает эти 100 узлов на 20 кластеров.
Я пробовал разные настройки, например, меняя поправочный коэффициент num_clusters = max(8, int(current_num_nodes * 0.8)) на разные значения, но это не сработает. Кажется, мне не хватает некоторых основ, но я не знаю, как их найти.
Настоятельно рекомендуем ваши идеи.

Подробнее здесь: https://stackoverflow.com/questions/791 ... geneous-gr

1731903984

Anonymous

Я пытаюсь реализовать неконтролируемую многоуровневую кластеризацию на основе подхода [b]difpool[/b].
[code]import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, DenseGCNConv, dense_diff_pool
from torch_geometric.utils import to_dense_adj
import networkx as nx
import matplotlib.pyplot as plt

# --- Step 1: Generate a Random Graph Using NetworkX ---
num_nodes = 100  # Number of nodes in the graph
num_edges = 300  # Number of edges to ensure sufficient connectivity

# Create a random graph using NetworkX
G = nx.gnm_random_graph(num_nodes, num_edges)

# Extract edge index from NetworkX graph
edge_index = torch.tensor(list(G.edges)).t().contiguous()

# Generate random node features
node_features = torch.rand((num_nodes, 16))  # 16-dimensional node features

# Create a torch_geometric Data object
data_homogeneous = Data(x=node_features, edge_index=edge_index)

# --- Visualize the Graph Structure ---
plt.figure(figsize=(8, 6))
nx.draw(G, with_labels=True, node_color='lightblue', node_size=500)
plt.title("Random Graph Structure")
plt.show()

# --- Step 2: Implement Multi-layer DiffPool ---
class MultiLayerDiffPool(torch.nn.Module):
def __init__(self, input_dim, hidden_dim, num_pool_layers, initial_num_nodes):
super(MultiLayerDiffPool, self).__init__()
self.num_pool_layers = num_pool_layers
self.projection = torch.nn.Linear(input_dim, hidden_dim)
self.gcn_layers = torch.nn.ModuleList()
self.dense_gcn_layers = torch.nn.ModuleList()
self.pool_assignments = torch.nn.ModuleList()

# Initialize the number of clusters conservatively
current_num_nodes = initial_num_nodes
for i in range(num_pool_layers):
# Reduce nodes gradually: Use a conservative reduction rate
num_clusters = max(8, int(current_num_nodes * 0.8))
# num_clusters = min(current_num_nodes, num_clusters)  # Ensure clusters do not exceed current nodes
current_num_nodes = num_clusters

self.gcn_layers.append(GCNConv(hidden_dim, hidden_dim))
self.dense_gcn_layers.append(DenseGCNConv(hidden_dim, hidden_dim))
self.pool_assignments.append(torch.nn.Linear(hidden_dim, num_clusters))

# Print shapes for debugging
print(f"Layer {i + 1} - x shape: {x.shape}, edge_index shape: {edge_index.shape}")

S = F.softmax(pool_assign(x), dim=-1)
layer_assignments.append(S.detach().cpu().numpy())

def forward(self, x, edge_index):
x = F.relu(self.projection(x))
batch = torch.zeros(x.size(0), dtype=torch.long, device=x.device)
adj_dense = to_dense_adj(edge_index, max_num_nodes=x.size(0))[0]

layer_assignments = []

for i, (gcn, dense_gcn, pool_assign) in enumerate(
zip(self.gcn_layers, self.dense_gcn_layers, self.pool_assignments)
):
x = F.relu(gcn(x, edge_index))
S = F.softmax(pool_assign(x), dim=-1)
layer_assignments.append(S.detach().cpu().numpy())

# Print shapes for debugging
print(f"Layer {i + 1} - x shape: {x.shape}, S shape: {S.shape}")

if S.size(1) >  x.size(0):
raise ValueError("Number of clusters in S cannot exceed the number of nodes.")

# Perform pooling and update x and adj_dense
x, adj_dense, _, _ = dense_diff_pool(x, adj_dense, S, batch)

# Ensure that x is reduced appropriately
x = x.mean(dim=1)
return x, layer_assignments

# --- Revised Training Loop ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MultiLayerDiffPool(
input_dim=node_features.size(1), hidden_dim=32, num_pool_layers=3, initial_num_nodes=num_nodes
).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
data_homogeneous = data_homogeneous.to(device)

# Use the initial node features for unsupervised reconstruction
original_features = data_homogeneous.x.clone().to(device)

model.train()
for epoch in range(100):
optimizer.zero_grad()
x, layer_assignments = model(data_homogeneous.x, data_homogeneous.edge_index)

# Use the reconstructed x as the features for loss calculation
reconstructed_features = x
loss = F.mse_loss(reconstructed_features, original_features)

loss.backward()
optimizer.step()
print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

print("Training complete.")

# --- Step 4: Plot Layer Assignments ---
for layer_idx, S in enumerate(layer_assignments):
print(f"Shape of S for layer {layer_idx + 1}: {S.shape}")

if len(S.shape) == 3 and S.shape[0] == 1:
S = S.squeeze(0)

plt.figure(figsize=(8, 6))
plt.imshow(S, cmap='viridis', aspect='auto')
plt.colorbar(label='Assignment Probability')
plt.title(f"Layer {layer_idx + 1} Node Assignments")
plt.xlabel("Clusters")
plt.ylabel("Nodes")
plt.show()

# --- Step 5: Plot Graph Per Layer ---
for layer_idx, S in enumerate(layer_assignments):
print(f"Shape of S for layer {layer_idx + 1}: {S.shape}")

if len(S.shape) == 3 and S.shape[0] == 1:
S = S.squeeze(0)

cluster_assignments = S.argmax(axis=1)
G_layer = nx.Graph()
G_layer.add_edges_from(data_homogeneous.edge_index.t().tolist())

colors = [cluster_assignments[node] for node in range(data_homogeneous.num_nodes)]

plt.figure(figsize=(10, 8))
nx.draw(
G_layer,
node_color=colors,
with_labels=True,
cmap='viridis',
node_size=300,
edge_color='gray'
)
plt.title(f"Graph Visualization at Layer {layer_idx + 1}")
plt.show()
[/code]
Вот распечатанный файл из приведенного выше сценария:

Слой 1 — форма x: torch.Size([100 , 32]), S shape: torch.Size([100,
70])

Но возвращает следующую ошибку.

{ "name": "RuntimeError", "message": "индекс 87 выходит за пределы
измерения 0 с размером 70", "stack":
"---- -------------------------------------------------- --------------------- RuntimeError Traceback (последний вызов
последний) Ячейка In[88], строка 94
92 для эпохи в диапазоне (100):
93Optimizer.zero_grad()
---> 94 x, Layer_assignments = model(data_homogeneous.x, data_homogeneous.edge_index)
96 # Использовать реконструированный x в качестве признаков для расчета потерь
97 восстановленных_функций = x
...
Файл
d:\test\.venv\lib\site-packages\torch_geometric\utils\scatter.py:75,
in scatter(src, index, dim, dim_size, уменьшить)
73, если уменьшить == 'сумма' или уменьшить == 'добавить':
74 index = Broadcast(index, src, dim)
---> 75 return src.new_zeros(size).scatter_add(dim, index, src)
77 if уменьшить == 'mean':
78 count = src.new_zeros(dim_size)Ошибка выполнения: индекс 87 выходит за пределы измерения 0 с размером 70"
}

Похоже, что количество кластеров (узлов в пуле) в матрице назначения S превышает количество доступных узлов на следующем уровне. [b]слой 1 - x shape: torch.Size([100, 32]), S shape: torch.Size([100, 20])[/b] означает, что у нас есть 100 узлов с 32 объектами в x, тогда как матрица назначения [b]S[/b] сокращает эти 100 узлов на 20 кластеров.
Я пробовал разные настройки, например, меняя поправочный коэффициент num_clusters = max(8, int(current_num_nodes * 0.8)) на разные значения, но это не сработает. Кажется, мне не хватает некоторых основ, но я не знаю, как их найти.
Настоятельно рекомендуем ваши идеи. 

Подробнее здесь: [url]https://stackoverflow.com/questions/79198686/diffpool-implementation-in-pytorch-for-unsipervised-clustering-of-homogeneous-gr[/url]