基于CNN-GCN的节点特征提取和标签预测
基于CNN-GCN的节点特征提取和标签预测
本项目使用CNN提取节点像素特征,并将其作为GCN网络的输入,以预测每个节点的标签向量。
数据集信息:
- 图数量:
num_graphs = 42 - 节点数量:
num_nodes = 37 - 图片尺寸:
image_size = 40 - 标签维度:
num_labels = 8 - 边数量:
num_edges = 61
数据文件路径:
- 节点特征:
'C:\Users\jh\Desktop\data\input\images{i}.png_{j}.png',其中i表示图序号 (1到42),j表示节点序号 (0到36),每个节点特征为图片的像素值。 - 节点标签:
'C:\Users\jh\Desktop\data\input\labels{i}_{j}.txt',每个文件包含8个用空格隔开的标签值,真实标签值只有0、1、2、3、4五个类别。 - 边关系:
'C:\Users\jh\Desktop\data\input\edges_L.csv',csv 文件中没有表头,第一列为源节点,第二列为目标节点,共有61条无向边。
模型结构:
- CNN网络: 用于提取节点像素特征,包含两层卷积层和一层全连接层。
- GCN网络: 基于图卷积神经网络,包含两层GCNConv层。
训练过程:
- 将每个图的前30个节点颜色特征加入训练掩码,后7个节点颜色特征加入验证掩码。
- 使用PyTorch Geometric库建立GCN网络。
- 使用Adam优化器训练模型。
输出结果:
- 输出每个节点的预测特征表示。
- 根据预测特征得到预测标签向量。
代码:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
num_graphs = 42
num_nodes = 37
image_size = 40
num_labels = 8
num_edges = 61
# 定义CNN网络
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
self.fc1 = nn.Linear(32 * (image_size // 4) * (image_size // 4), 128)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = x.view(-1, 32 * (image_size // 4) * (image_size // 4))
x = F.relu(self.fc1(x))
return x
# 定义GCN网络
class GCN(nn.Module):
def __init__(self):
super(GCN, self).__init__()
self.conv1 = GCNConv(128, 64)
self.conv2 = GCNConv(64, num_labels)
def forward(self, x, edge_index):
x = self.conv1(x, edge_index)
x = F.relu(x)
x = self.conv2(x, edge_index)
return x
# 加载节点特征和标签
node_features = []
for i in range(1, num_graphs + 1):
graph_features = []
for j in range(num_nodes):
image_path = f'C:\Users\jh\Desktop\data\input\images{i}.png_{j}.png'
image = torch.load(image_path)
graph_features.append(image)
node_features.append(graph_features)
labels = []
for i in range(1, num_graphs + 1):
graph_labels = []
for j in range(num_nodes):
label_path = f'C:\Users\jh\Desktop\data\input\labels{i}_{j}.txt'
with open(label_path, 'r') as f:
label = f.readline().split()
label = [int(l) for l in label]
graph_labels.append(label)
labels.append(graph_labels)
# 加载边关系
edge_index = []
edge_path = 'C:\Users\jh\Desktop\data\input\edges_L.csv'
with open(edge_path, 'r') as f:
for line in f:
src, tgt = line.split(',')
src, tgt = int(src), int(tgt)
edge_index.append([src, tgt])
edge_index.append([tgt, src])
# 创建PyG数据对象
data_list = []
for i in range(num_graphs):
x = torch.stack(node_features[i], dim=0)
y = torch.tensor(labels[i], dtype=torch.float)
edge_index_tensor = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
data = Data(x=x, y=y, edge_index=edge_index_tensor)
data_list.append(data)
# 定义模型
model = nn.Sequential(CNN(), GCN())
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# 训练循环
for epoch in range(100):
for data in data_list:
optimizer.zero_grad()
out = model(data.x, data.edge_index)
loss = F.mse_loss(out[:30], data.y[:30]) # 训练掩码
loss += F.mse_loss(out[30:], data.y[30:]) # 验证掩码
loss.backward()
optimizer.step()
# 输出预测特征表示
for i, data in enumerate(data_list):
out = model(data.x, data.edge_index)
print(f'Graph {i+1} Predicted Feature Representations:')
print(out)
print()
# 输出预测标签向量
for i, data in enumerate(data_list):
out = model(data.x, data.edge_index)
predicted_labels = torch.argmax(out, dim=1)
print(f'Graph {i+1} Predicted Label Vectors:')
print(predicted_labels)
print()
原文地址: https://www.cveoy.top/t/topic/pk59 著作权归作者所有。请勿转载和采集!