Я пытаюсь использовать слой сети свертки геометрических гиперграфов Pytorch для обучения наборам данных гиперграфов. Я использую набор данных обзора Amazon, предоставленный https://www.cs.cornell.edu/~arb/data/amazon-reviews/
Поскольку узел не имеет функции , я просто помещаю константы во все узлы как функции. Далее для гиперребер я строю матрицу инцидентности.
Сначала загрузите набор данных командой:
!wget -P /tmp https://github.com/gravitogen/hosting_datasets/releases/download/amazon_review_1.0/amazon-reviews.zip
!unzip /tmp/amazon-reviews.zip
< /code>
Код ниже: < /p>
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import HypergraphConv
import numpy as np
import pandas as pd
class HyperGraph1(nn.Module):
def __init__(self, nfeatures, nhiddden, nclass, dropout):
super(HyperGraph1, self).__init__()
self.conv1 = HypergraphConv(nfeatures, nhiddden)
self.conv2 = HypergraphConv(nhiddden, nclass)
self.dropout_p = dropout
def forward(self, data):
x, edge_index = data.x, data.edge_index
x = self.conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, training=self.training)
x = self.conv2(x, edge_index)
return F.log_softmax(x, dim=1)
folder = 'amazon-reviews/'
node_indices_files = 'node-labels-amazon-reviews.txt'
node_label_files = 'label-names-amazon-reviews.txt'
hyperedge_files = 'hyperedges-amazon-reviews.txt'
node_indices = pd.read_csv(folder + node_indices_files, header=None)
labels_annot = pd.read_csv(folder + node_label_files, header = None)
hyper_edge = []
with open(folder + hyperedge_files) as f:
for line in f.readlines():
chunks = line.split(',')
chunks = [int(i) for i in chunks]
hyper_edge.append(chunks)
nodes = node_indices.index.to_list()
labels = node_indices[node_indices.columns[0]].to_list()
# generating sparse incidence matrix for storing hyperedges
node_on_edge_list = []
edge_indices = []
counter = 0
for l in hyper_edge:
node_on_edge_list += l
edge_indices += [counter]*len(l)
counter = counter + 1
hyperedge_index = torch.tensor([ node_on_edge_list, edge_indices])
# generate `Data` for pytorch-geometric
x = np.zeros(len(nodes)*2, dtype=int)
x = x.reshape(len(nodes),2).tolist()
X = torch.tensor(x, dtype=torch.float)
Y = torch.tensor(labels, dtype=torch.int)
from torch_geometric.data import Data
data = Data(x=X, edge_index=hyperedge_index, y=Y)
# Now get ready for training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = HyperGraph1(nfeatures=2, nhiddden=16, nclass=labels_annot.shape[0],dropout=0.1).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
model.train()
random_nums = np.random.rand(1, len(data.y))
train_np = random_nums > 0.5
train_mask = train_np.tolist()[0]
test_np = random_nums 3 out = model(data)
4 loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
5 loss.backward()
~/anaconda3/envs/gcn/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
in forward(self, data)
8 def forward(self, data):
9 x, edge_index = data.x, data.edge_index
---> 10 x = self.conv1(x, edge_index)
11 x = F.relu(x)
12 x = F.dropout(x, training=self.training)
~/anaconda3/envs/gcn/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/gcn/lib/python3.7/site-packages/torch_geometric/nn/conv/hypergraph_conv.py in forward(self, x, hyperedge_index, hyperedge_weight, hyperedge_attr)
139
140 D = scatter_add(hyperedge_weight[hyperedge_index[1]],
--> 141 hyperedge_index[0], dim=0, dim_size=num_nodes)
142 D = 1.0 / D
143 D[D == float("inf")] = 0
~/anaconda3/envs/gcn/lib/python3.7/site-packages/torch_scatter/scatter.py in scatter_add(src, index, dim, out, dim_size)
27 out: Optional[torch.Tensor] = None,
28 dim_size: Optional[int] = None) -> torch.Tensor:
---> 29 return scatter_sum(src, index, dim, out, dim_size)
30
31
~/anaconda3/envs/gcn/lib/python3.7/site-packages/torch_scatter/scatter.py in scatter_sum(src, index, dim, out, dim_size)
19 size[dim] = int(index.max()) + 1
20 out = torch.zeros(size, dtype=src.dtype, device=src.device)
---> 21 return out.scatter_add_(dim, index, src)
22 else:
23 return out.scatter_add_(dim, index, src)
RuntimeError: index 2268264 is out of bounds for dimension 0 with size 2268264
Я не знаю, как исправить это. Примечание 2268264-это количество узлов на графике.
i имитировал конструкцию набора данных от https: // pytorch-геометрический. readthedocs.io/en/latest/notes/introduction.html, так что я не уверен, какую ошибку я сделал здесь.
Я пытаюсь использовать слой сети свертки геометрических гиперграфов Pytorch для обучения наборам данных гиперграфов. Я использую набор данных обзора Amazon, предоставленный https://www.cs.cornell.edu/~arb/data/amazon-reviews/ Поскольку узел не имеет функции , я просто помещаю константы во все узлы как функции. Далее для гиперребер я строю матрицу инцидентности. Сначала загрузите набор данных командой: [code]!wget -P /tmp https://github.com/gravitogen/hosting_datasets/releases/download/amazon_review_1.0/amazon-reviews.zip !unzip /tmp/amazon-reviews.zip < /code> Код ниже: < /p> import torch import torch.nn as nn import torch.nn.functional as F from torch_geometric.nn import HypergraphConv import numpy as np import pandas as pd
labels_annot = pd.read_csv(folder + node_label_files, header = None) hyper_edge = [] with open(folder + hyperedge_files) as f: for line in f.readlines(): chunks = line.split(',') chunks = [int(i) for i in chunks] hyper_edge.append(chunks) nodes = node_indices.index.to_list() labels = node_indices[node_indices.columns[0]].to_list() # generating sparse incidence matrix for storing hyperedges node_on_edge_list = [] edge_indices = [] counter = 0 for l in hyper_edge: node_on_edge_list += l edge_indices += [counter]*len(l) counter = counter + 1
hyperedge_index = torch.tensor([ node_on_edge_list, edge_indices]) # generate `Data` for pytorch-geometric x = np.zeros(len(nodes)*2, dtype=int) x = x.reshape(len(nodes),2).tolist() X = torch.tensor(x, dtype=torch.float) Y = torch.tensor(labels, dtype=torch.int) from torch_geometric.data import Data data = Data(x=X, edge_index=hyperedge_index, y=Y) # Now get ready for training device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = HyperGraph1(nfeatures=2, nhiddden=16, nclass=labels_annot.shape[0],dropout=0.1).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4) model.train() random_nums = np.random.rand(1, len(data.y)) train_np = random_nums > 0.5 train_mask = train_np.tolist()[0]
test_np = random_nums 3 out = model(data) 4 loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask]) 5 loss.backward()
~/anaconda3/envs/gcn/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks 1050 or _global_forward_hooks or _global_forward_pre_hooks): -> 1051 return forward_call(*input, **kwargs) 1052 # Do not call functions when jit is used 1053 full_backward_hooks, non_full_backward_hooks = [], []
in forward(self, data) 8 def forward(self, data): 9 x, edge_index = data.x, data.edge_index ---> 10 x = self.conv1(x, edge_index) 11 x = F.relu(x) 12 x = F.dropout(x, training=self.training)
~/anaconda3/envs/gcn/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks 1050 or _global_forward_hooks or _global_forward_pre_hooks): -> 1051 return forward_call(*input, **kwargs) 1052 # Do not call functions when jit is used 1053 full_backward_hooks, non_full_backward_hooks = [], []
~/anaconda3/envs/gcn/lib/python3.7/site-packages/torch_geometric/nn/conv/hypergraph_conv.py in forward(self, x, hyperedge_index, hyperedge_weight, hyperedge_attr) 139 140 D = scatter_add(hyperedge_weight[hyperedge_index[1]], --> 141 hyperedge_index[0], dim=0, dim_size=num_nodes) 142 D = 1.0 / D 143 D[D == float("inf")] = 0
RuntimeError: index 2268264 is out of bounds for dimension 0 with size 2268264 [/code] Я не знаю, как исправить это. Примечание 2268264-это количество узлов на графике. i имитировал конструкцию набора данных от https: // pytorch-геометрический. readthedocs.io/en/latest/notes/introduction.html, так что я не уверен, какую ошибку я сделал здесь.