Building a Transformer Model for Tabular Data with PyTorch: A Comprehensive Guide with Code
Here's a comprehensive guide on how to build a Transformer model for tabular data using PyTorch, complete with a code example.
Defining the Transformer Model
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
# Define the Transformer Model
class TransformerModel(nn.Module):
def __init__(self, input_size, output_size, num_layers, hidden_size, dropout):
super(TransformerModel, self).__init__()
self.input_size = input_size
self.output_size = output_size
self.num_layers = num_layers
self.hidden_size = hidden_size
self.dropout = dropout
self.transformer_encoder_layer = nn.TransformerEncoderLayer(d_model=input_size, nhead=8, dropout=dropout)
self.transformer_encoder = nn.TransformerEncoder(self.transformer_encoder_layer, num_layers=num_layers)
self.fc = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.dropout_layer = nn.Dropout(dropout)
self.output = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.transformer_encoder(x)
x = self.fc(x)
x = self.relu(x)
x = self.dropout_layer(x)
x = self.output(x)
return x
Defining the Tabular Dataset
# Define the Dataset
class TabularDataset(Dataset):
def __init__(self, data_path):
self.data = pd.read_csv(data_path)
self.features = self.data.iloc[:, :-1].values
self.target = self.data.iloc[:, -1].values
def __getitem__(self, index):
x = self.features[index]
y = self.target[index]
return torch.tensor(x, dtype=torch.float), torch.tensor(y, dtype=torch.float)
def __len__(self):
return len(self.data)
Training and Testing Functions
# Define the training function
def train(model, device, train_loader, optimizer, criterion, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
if batch_idx % 10 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)] Loss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
# Define the test function
def test(model, device, test_loader, criterion):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += criterion(output, target).item() # sum up batch loss
test_loss /= len(test_loader.dataset)
print('Test set: Average loss: {:.4f}
'.format(test_loss))
Setting up the Training Process
# Set the hyperparameters
input_size = 10
output_size = 1
num_layers = 4
hidden_size = 128
dropout = 0.2
lr = 0.001
epochs = 10
batch_size = 32
# Load the data
train_path = 'train.csv'
test_path = 'test.csv'
train_dataset = TabularDataset(train_path)
test_dataset = TabularDataset(test_path)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
# Set up the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Create the model and optimizer
model = TransformerModel(input_size, output_size, num_layers, hidden_size, dropout).to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.MSELoss()
# Train and test the model
for epoch in range(1, epochs + 1):
train(model, device, train_loader, optimizer, criterion, epoch)
test(model, device, test_loader, criterion)
Explanation
-
Model Definition: The
TransformerModelclass utilizes PyTorch'snn.TransformerEncoderLayerandnn.TransformerEncoderto construct a transformer architecture. This architecture is designed to handle sequential data and learn relationships between input features. -
Dataset Definition: The
TabularDatasetclass loads tabular data from a CSV file, splits it into features and target variables, and prepares it for use with PyTorch'sDataLoader. -
Training and Testing: The
trainandtestfunctions provide the logic for training and evaluating the model. During training, the model learns to minimize the loss between its predictions and the actual target values. During testing, the model's performance is assessed on unseen data.
Key Points
- This code provides a basic framework for building a Transformer model for tabular data. You can customize it to suit your specific dataset and task.
- Consider exploring different hyperparameter settings (e.g., number of layers, hidden size, dropout) to optimize the model's performance.
- You can further enhance the model by incorporating techniques like early stopping, regularization, and more sophisticated data preprocessing.
- This example focuses on regression; you can adapt it for classification tasks by modifying the output layer and loss function.
原文地址: https://www.cveoy.top/t/topic/nMJB 著作权归作者所有。请勿转载和采集!