基于PyTorch Geometric的图神经网络模型训练与评估
您好! 下面是使用PyTorch Geometric (PYG) 库和自定义数据集 MyDataset 类来实现上述需求的代码:
import torch
from torch_geometric.data import Dataset
from torch_geometric.data import Data
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import DataLoader
class MyDataset(Dataset):
def __init__(self, root, transform=None, pre_transform=None):
super(MyDataset, self).__init__(root, transform, pre_transform)
@property
def raw_file_names(self):
return ['edges.txt', 'features1.txt', 'features2.txt', 'label.txt']
def download(self):
# 不需要下载数据集,直接从文件读取
pass
def process(self):
# 读取边的连接关系
with open(self.raw_paths[0], 'r') as file:
edges = file.read().split()
edges = [int(edge) for edge in edges]
# 读取第一个特征
with open(self.raw_paths[1], 'r') as file:
features1 = file.readlines()
features1 = [[int(feat) for feat in feat_list.split()] for feat_list in features1]
# 读取第二个特征
with open(self.raw_paths[2], 'r') as file:
features2 = file.readlines()
features2 = [[int(feat) for feat in feat_list.split()] for feat_list in features2]
# 读取标签
with open(self.raw_paths[3], 'r') as file:
labels = file.readlines()
labels = [[int(label) for label in label_list.split()] for label_list in labels]
# 构建数据集
data_list = []
for i in range(10):
edge_index = torch.tensor(edges[i*40:(i+1)*40], dtype=torch.long).view(2, -1)
x1 = torch.tensor(features1[i], dtype=torch.float)
x2 = torch.tensor(features2[i], dtype=torch.float)
y = torch.tensor(labels[i], dtype=torch.long)
data = Data(x=torch.cat([x1.unsqueeze(1), x2.unsqueeze(1)], dim=1), edge_index=edge_index, y=y)
data_list.append(data)
torch.save(self.collate(data_list), self.processed_paths[0])
def len(self):
return 10
def get(self, idx):
data = torch.load(self.processed_paths[0])
return data[idx]
dataset = MyDataset(root='/path/to/dataset')
# 创建训练集和验证集的掩码
mask = torch.zeros(20, dtype=torch.bool)
mask[:16] = 1
train_dataset = dataset[mask]
val_dataset = dataset[~mask]
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)
class GCN(torch.nn.Module):
def __init__(self):
super(GCN, self).__init__()
self.conv1 = GCNConv(2, 16)
self.conv2 = GCNConv(16, 2)
def forward(self, data):
x, edge_index = data.x, data.edge_index
x = self.conv1(x, edge_index)
x = F.relu(x)
x = self.conv2(x, edge_index)
return x
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
def train():
model.train()
train_loss = 0
for data in train_loader:
data = data.to(device)
optimizer.zero_grad()
out = model(data)
loss = F.cross_entropy(out, data.y)
loss.backward()
optimizer.step()
train_loss += loss.item() * data.num_graphs
return train_loss / len(train_loader.dataset)
def test(loader):
model.eval()
correct = 0
val_loss = 0
for data in loader:
data = data.to(device)
out = model(data)
val_loss += F.cross_entropy(out, data.y).item() * data.num_graphs
pred = out.max(dim=1)[1]
correct += pred.eq(data.y).sum().item()
return correct / len(loader.dataset), val_loss / len(loader.dataset)
for epoch in range(1, 201):
loss = train()
train_acc, _ = test(train_loader)
val_acc, _ = test(val_loader)
print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}')
请将 '/path/to/dataset' 替换为数据集文件存储的路径。训练过程中将输出每个 epoch 的损失和准确率。
原文地址: https://www.cveoy.top/t/topic/m1lw 著作权归作者所有。请勿转载和采集!