PyTorch Geometric GCN 模型训练与验证:基于自定义数据集的图神经网络
import os
import pandas as pd
import torch
import torch.nn as nn
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
# 加载数据并创建PyG数据集类:
class MyDataset(torch.utils.data.Dataset):
def __init__(self, root, transform=None, pre_transform=None):
self.edges = pd.read_csv(os.path.join(root, 'edges.csv'), header=None)
self.features1 = pd.read_csv(os.path.join(root, 'features1.csv'), header=None)
self.features2 = pd.read_csv(os.path.join(root, 'features2.csv'), header=None)
self.labels = pd.read_csv(os.path.join(root, 'labels.csv'), header=None)
self.transform = transform
self.pre_transform = pre_transform
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
edge_index = torch.tensor(self.edges.values, dtype=torch.long).t().contiguous()
x = torch.stack([torch.tensor(self.features1.iloc[idx].values, dtype=torch.float),
torch.tensor(self.features2.iloc[idx].values, dtype=torch.float)], dim=1)
# 将两个特征向量进行堆叠操作的。具体来说,self.features1.iloc[idx] 和 self.features2.iloc[idx] 是两个Pandas DataFrame对象的行索引为idx的数据,这里假设它们的形状为(n,),即包含了n个元素的一维数组。
#torch.stack()函数将两个张量在dim=1进行堆叠操作,即将它们按列方向拼接成一个新的张量。假设两个特征向量都有m个元素,那么堆叠后的张量的形状将为(m,2),即包含m行、2列的二维数组。
y = torch.tensor(self.labels.iloc[idx].values, dtype=torch.long)
# 定义图数据的train_mask和val_mask
train_mask = torch.zeros(y.size(0), dtype=torch.bool)
val_mask = torch.zeros(y.size(0), dtype=torch.bool)
train_mask[:16] = (idx < 16)
val_mask[16:] = (idx >= 16)
data = Data(x=x, edge_index=edge_index, y=y, train_mask=train_mask, val_mask=val_mask)
if self.transform is not None:
data = self.transform(data)
return data
# 定义GCN模型:
class GCN(torch.nn.Module):
def __init__(self, num_node_features, num_classes):
super(GCN, self).__init__()
self.conv1 = GCNConv(num_node_features, 16)
self.conv2 = GCNConv(16, 32)
self.conv3 = GCNConv(32, num_classes)
def forward(self, data):
x, edge_index = data.x, data.edge_index
x = self.conv1(x, edge_index)
x = F.relu(x)
x = self.conv2(x, edge_index)
x = F.relu(x)
x = F.dropout(x, training=self.training)
x = self.conv3(x, edge_index)
return x
# class GCN(nn.Module):
# def __init__(self, num_node_features, num_classes):
# super(GCN, self).__init__()
# self.conv1 = GCNConv(num_node_features, 32)
# self.conv2 = GCNConv(32, num_classes)
# def forward(self, data):
# x, edge_index = data.x, data.edge_index
# x = self.conv1(x, edge_index)
# x = F.relu(x)
# x = F.dropout(x, training=self.training)
# x = self.conv2(x, edge_index)
# return F.log_softmax(x, dim=1)
# 创建训练和验证模型:
def train_model(dataset, model, optimizer, device):
model.train()
total_loss = 0.0
for data in dataset:
data = data.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.cross_entropy(output[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
total_loss += loss.item()
return total_loss / len(dataset)
def validate_model(dataset, model, device):
model.eval()
correct = 0
total = 0
for data in dataset:
data = data.to(device)
output = model(data)
_, predicted = torch.max(output[data.val_mask], 1)
total += data.val_mask.sum().item()
correct += (predicted == data.y[data.val_mask]).sum().item()
return correct / total
# 加载数据集、创建模型、定义优化器和训练循环,以及验证模型:
if __name__ == '__main__':
dataset = MyDataset(root="C:\Users\jh\Desktop\data\raw1")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN(num_node_features=2, num_classes=3).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
train_dataset, val_dataset = train_test_split(dataset, test_size=0.2)
#将数据集dataset分成训练集(train_dataset)和验证集(val_dataset),其中训练集占80%(test_size=0.2)的比例,验证集占20%的比例。
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)
# dataloader = DataLoader(dataset, batch_size=1, shuffle=False)
epochs = 2000
for epoch in range(epochs):
# train_loss = train_model(dataloader, model, optimizer, device)
# val_accuracy = validate_model(dataloader, model, device)
train_loss = train_model(train_loader, model, optimizer, device)
val_accuracy = validate_model(val_loader, model, device)
print(f'Epoch {epoch+1}/{epochs} , Train Loss: {train_loss:.4f} , Val_Acc: {val_accuracy:.4f}')
原文地址: https://www.cveoy.top/t/topic/i93k 著作权归作者所有。请勿转载和采集!