基于GCN的多标签节点分类:使用CNN降维和PYG库
基于GCN的多标签节点分类:使用CNN降维和PYG库
本项目旨在利用图卷积网络(GCN)对节点进行多标签分类。
数据集信息:
- 图数量:
num_graphs = 42 - 节点数量:
num_nodes = 37 - 图像尺寸:
image_size = 40 - 标签数量:
num_labels = 8 - 边数量:
num_edges = 61
数据格式:
- 节点特征:每个节点的像素值存储在
'C:\Users\jh\Desktop\data\input\images\{i}.png_{j}.png'文件中,其中i表示图序号(1-42),j表示节点序号(0-36)。 - 节点标签:每个节点的8维标签向量存储在
'C:\Users\jh\Desktop\data\input\labels\{i}_{j}.txt'文件中,标签用空格隔开。真实标签值只有0、1、2、3、4五个类别,例如节点157的标签向量为:'2 2 1 1 3 1 2 1'。 - 边关系:存储在
'C:\Users\jh\Desktop\data\input\edges_L.csv'文件中,表格中没有header,第一列为源节点,第二列为目标节点,共有61条无向边。
模型训练:
- 使用CNN对节点像素特征进行降维。
- 将每个图的前30个节点颜色特征加入训练掩码,后7个节点颜色特征加入验证掩码。
- 使用PYG库建立GCN网络实现多标签分类任务。
- 损失函数使用
torch.nn模块中的MultiLabelSoftMarginLoss。
代码示例:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import pandas as pd
from torch_geometric.data import Data, Batch
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
# Define dataset class for node features
class NodeFeatureDataset(Dataset):
def __init__(self, num_graphs, num_nodes, image_size):
self.num_graphs = num_graphs
self.num_nodes = num_nodes
self.image_size = image_size
def __len__(self):
return self.num_graphs * self.num_nodes
def __getitem__(self, idx):
graph_idx = idx // self.num_nodes
node_idx = idx % self.num_nodes
image_path = f'C:\Users\jh\Desktop\data\input\images\{graph_idx+1}.png_{node_idx}.png'
image = Image.open(image_path)
transform = transforms.Compose([
transforms.Resize((self.image_size, self.image_size)),
transforms.ToTensor()
])
image = transform(image)
return image
# Define dataset class for labels
class LabelDataset(Dataset):
def __init__(self, num_graphs, num_nodes):
self.num_graphs = num_graphs
self.num_nodes = num_nodes
def __len__(self):
return self.num_graphs * self.num_nodes
def __getitem__(self, idx):
graph_idx = idx // self.num_nodes
node_idx = idx % self.num_nodes
label_path = f'C:\Users\jh\Desktop\data\input\labels\{graph_idx+1}_{node_idx}.txt'
with open(label_path, 'r') as f:
labels = f.read().split()
labels = [int(label) for label in labels]
return labels
# Define dataset class for edges
class EdgeDataset(Dataset):
def __init__(self, num_edges):
self.num_edges = num_edges
def __len__(self):
return self.num_edges
def __getitem__(self, idx):
edges_df = pd.read_csv('C:\Users\jh\Desktop\data\input\edges_L.csv', header=None)
edge = edges_df.iloc[idx].tolist()
return edge
# Define GCN model
class GCN(nn.Module):
def __init__(self, num_features, hidden_dim, num_labels):
super(GCN, self).__init__()
self.conv1 = GCNConv(num_features, hidden_dim)
self.conv2 = GCNConv(hidden_dim, num_labels)
def forward(self, x, edge_index):
x = self.conv1(x, edge_index)
x = F.relu(x)
x = self.conv2(x, edge_index)
return x
# Hyperparameters
num_graphs = 42
num_nodes = 37
image_size = 40
num_labels = 8
num_edges = 61
hidden_dim = 16
num_epochs = 10
batch_size = 32
learning_rate = 0.01
# Create datasets
node_feature_dataset = NodeFeatureDataset(num_graphs, num_nodes, image_size)
label_dataset = LabelDataset(num_graphs, num_nodes)
edge_dataset = EdgeDataset(num_edges)
# Create data loaders
node_feature_loader = DataLoader(node_feature_dataset, batch_size=batch_size, shuffle=True)
label_loader = DataLoader(label_dataset, batch_size=batch_size, shuffle=True)
edge_loader = DataLoader(edge_dataset, batch_size=batch_size, shuffle=True)
# Create GCN model
model = GCN(image_size, hidden_dim, num_labels)
# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
loss_fn = nn.MultiLabelSoftMarginLoss()
# Training loop
for epoch in range(num_epochs):
total_loss = 0
for node_features, labels, edges in zip(node_feature_loader, label_loader, edge_loader):
node_features = node_features.view(-1, image_size * image_size)
labels = labels.view(-1, num_labels)
edges = edges.t()
optimizer.zero_grad()
output = model(node_features, edges)
loss = loss_fn(output, labels)
total_loss += loss.item()
loss.backward()
optimizer.step()
print(f'Epoch {epoch+1}/{num_epochs}, Loss: {total_loss}')
# Predict node features
all_node_features = []
for node_features, edges in zip(node_feature_loader, edge_loader):
node_features = node_features.view(-1, image_size * image_size)
edges = edges.t()
with torch.no_grad():
output = model(node_features, edges)
all_node_features.append(output)
all_node_features = torch.cat(all_node_features, dim=0)
# Predict labels
all_labels = []
for node_features, edges in zip(node_feature_loader, edge_loader):
node_features = node_features.view(-1, image_size * image_size)
edges = edges.t()
with torch.no_grad():
output = model(node_features, edges)
labels = torch.sigmoid(output)
labels[labels >= 0.5] = 1
labels[labels < 0.5] = 0
all_labels.append(labels)
all_labels = torch.cat(all_labels, dim=0)
print('Predicted node features:')
print(all_node_features)
print('Predicted labels:')
print(all_labels)
输出结果:
代码运行后将输出每个节点的预测特征表示和预测标签向量。
总结:
本项目展示了如何使用GCN进行多标签节点分类,并结合CNN进行特征降维。通过PYG库的运用,简化了图神经网络的构建过程。最终,代码成功输出每个节点的预测特征表示和预测标签向量,实现了多标签节点分类的目标。
原文地址: https://www.cveoy.top/t/topic/pk5F 著作权归作者所有。请勿转载和采集!