PyTorch CNN and GCN: Implementing a Hybrid Neural Network for Image Classification

This code implements a hybrid CNN-GCN network for image classification using PyTorch. It consists of two main parts: a CNN network for extracting image features and a GCN network for capturing relationships between those features.

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from PIL import Image
import numpy as np
import pandas as pd

# Define the CNN network
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(32 * 10 * 10, 128)
        self.fc2 = nn.Linear(128, 8)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 32 * 10 * 10)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Define the GCN network
class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# Load node features and label data
def load_data():
    features = []
    labels = []
    for i in range(1, 43):
        for j in range(37):
            image_path = f'C:/Users/jh/Desktop/data/input/images/{i}.png_{j}.png'
            image = Image.open(image_path).convert('RGB') # Convert image to RGB format
            image = np.array(image).transpose((2, 0, 1)) # Convert to (C, H, W) format
            features.append(image)

            label_path = f'C:/Users/jh/Desktop/data/input/labels/{i}_{j}.txt'
            with open(label_path, 'r') as file:
                label = [int(x) for x in file.readline().split()]
                labels.append(label)

    features = np.array(features)
    labels = np.array(labels)

    return features, labels

# Load edge relationship data
def load_edges():
    edges = pd.read_csv('C:/Users/jh/Desktop/data/input/edges_L.csv', header=None)
    edge_index = torch.tensor(edges.values, dtype=torch.long).t().contiguous()

    return edge_index

# Train the CNN network
def train_cnn(features, labels):
    cnn = CNN()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(cnn.parameters(), lr=0.001)

    features = torch.tensor(features, dtype=torch.float32)
    labels = torch.tensor(labels, dtype=torch.long) # Convert labels to long

    train_features = features[:38*37]
    train_labels = labels[:38*37]
    test_features = features[38*37:]
    test_labels = labels[38*37:]

    train_dataset = torch.utils.data.TensorDataset(train_features, train_labels)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

    for epoch in range(10):
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            outputs = cnn(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 10 == 9:
                print(f'Epoch: {epoch+1}, Batch: {i+1}, Loss: {running_loss/10}')
                running_loss = 0.0

    # Evaluate model performance on the test set
    test_outputs = cnn(test_features)
    _, predicted = torch.max(test_outputs, 1)
    accuracy = (predicted == test_labels).sum().item() / len(test_labels)
    print(f'Test Accuracy: {accuracy}')

    return cnn

# Train the GCN network
def train_gcn(features, labels, edge_index):
    gcn = GCN(input_dim=8, hidden_dim=16, output_dim=8)
    criterion = nn.MultiLabelSoftMarginLoss()
    optimizer = optim.Adam(gcn.parameters(), lr=0.001)

    features = torch.tensor(features, dtype=torch.float32)
    labels = torch.tensor(labels, dtype=torch.float32) # Convert labels to float

    train_features = features[:38*37]
    train_labels = labels[:38*37]
    test_features = features[38*37:]
    test_labels = labels[38*37:]

    train_mask = torch.zeros(train_features.size(0), dtype=torch.bool)
    train_mask[:train_features.size(0)//2] = 1
    val_mask = torch.zeros(train_features.size(0), dtype=torch.bool)
    val_mask[train_features.size(0)//2:train_features.size(0)] = 1
    test_mask = torch.zeros(test_features.size(0), dtype=torch.bool)
    test_mask[:] = 1

    data = Data(x=train_features, y=train_labels, edge_index=edge_index)
    data.train_mask = train_mask
    data.val_mask = val_mask
    data.test_mask = test_mask

    loader = DataLoader([data], batch_size=1)

    for epoch in range(10):
        running_loss = 0.0
        for batch in loader:
            optimizer.zero_grad()
            outputs = gcn(batch.x, batch.edge_index)
            loss = criterion(outputs[batch.train_mask], batch.y[batch.train_mask])
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        print(f'Epoch: {epoch+1}, Loss: {running_loss/len(loader)}')

    # Evaluate model performance on the test set
    gcn.eval()
    test_outputs = gcn(test_features, edge_index)
    test_outputs = torch.sigmoid(test_outputs)
    test_outputs = (test_outputs > 0.5).float()
    accuracy = (test_outputs[test_mask] == test_labels[test_mask]).sum().item() / len(test_labels[test_mask])
    print(f'Test Accuracy: {accuracy}')

    return gcn

# Load data
features, labels = load_data()
edge_index = load_edges()

# Train the CNN network
cnn = train_cnn(features, labels)

# Train the GCN network
gcn = train_gcn(cnn.features, labels, edge_index)

The code first defines the CNN and GCN networks. The CNN network is a basic convolutional network with two convolutional layers, followed by a max pooling layer and two fully connected layers. The GCN network is a simple graph convolutional network with two layers.

The code then loads the image data and labels, as well as the edge relationship data. The image data is converted to numpy arrays and then to PyTorch tensors. The edge relationship data is loaded from a CSV file and converted to a PyTorch tensor.

The code then trains the CNN network on the image data. The training is done using the torch.utils.data.DataLoader class to iterate over the data in batches. The loss function used is nn.CrossEntropyLoss(). The optimizer used is optim.Adam(). The trained CNN network is then used to extract features from the image data, which are then passed to the GCN network.

Finally, the code trains the GCN network on the extracted features. The training is done using the torch.utils.data.DataLoader class to iterate over the data in batches. The loss function used is nn.MultiLabelSoftMarginLoss(). The optimizer used is optim.Adam(). The trained GCN network is then used to predict the labels of the images.

This code provides a basic example of how to implement a hybrid CNN-GCN network in PyTorch. The code can be extended to include more complex network architectures, different training strategies, and various image datasets.

PyTorch CNN and GCN: Implementing a Hybrid Neural Network for Image Classification