ConvLSTM 模型报错:TypeError: cross_entropy_loss(): argument 'input' (position 1) must be Tensor, not tuple
ConvLSTM 模型报错:TypeError: cross_entropy_loss(): argument 'input' (position 1) must be Tensor, not tuple
根据报错信息,可以发现问题出现在计算损失函数这一步,具体是在调用 torch.nn.functional.cross_entropy 这个函数时,输入的 input 参数是一个 tuple 类型,而不是 Tensor 类型,导致程序报错。这个 tuple 由 ConvLSTM 模型的 forward 函数返回的,因此需要检查一下 ConvLSTM 模型的 forward 函数的输出是否正确。
经过检查,发现 ConvLSTM 模型的 forward 函数的最后一行缩进多了一个空格,导致 return 语句没有正确地返回值。将最后一行缩进的空格去掉即可解决问题。
另外,还需要注意一些其他问题,例如代码中的一些变量名没有被正确定义、使用未定义的变量等等。修改后的代码如下:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from datetime import datetime
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class ConvLSTMCell(nn.Module):
def __init__(self, input_dim, hidden_dim, kernel_size, bias):
super(ConvLSTMCell, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.kernel_size = kernel_size
self.padding = kernel_size[0] // 2, kernel_size[1] // 2
self.bias = bias
self.conv = nn.Conv2d(in_channels=self.input_dim + self.hidden_dim,
out_channels=4 * self.hidden_dim,
kernel_size=self.kernel_size,
padding=self.padding,
bias=self.bias).cuda()
def forward(self, input_tensor, cur_state):
h_cur, c_cur = cur_state
h_cur= h_cur.cuda()
combined = torch.cat([input_tensor, h_cur], dim=1)
combined_conv = self.conv(combined.float())
cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=1)
i = torch.sigmoid(cc_i)
f = torch.sigmoid(cc_f)
o = torch.sigmoid(cc_o)
g = torch.tanh(cc_g)
c_next = f * c_cur + i * g
h_next = o * torch.tanh(c_next)
return h_next, c_next
def init_hidden(self, batch_size, image_size):
height, width = image_size
return (torch.zeros(batch_size, self.hidden_dim, height, width, device=self.conv.weight.device),
torch.zeros(batch_size, self.hidden_dim, height, width, device=self.conv.weight.device))
class ConvLSTM(nn.Module):
def __init__(self, input_dim,hidden_dim, kernel_size, num_layers,
batch_first=False, bias=True, return_all_layers=False):
super(ConvLSTM, self).__init__()
self._check_kernel_size_consistency(kernel_size)
kernel_size = self._extend_for_multilayer(kernel_size, num_layers)
hidden_dim = self._extend_for_multilayer(hidden_dim, num_layers)
if not len(kernel_size) == len(hidden_dim) == num_layers:
raise ValueError('Inconsistent list length.')
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.kernel_size = kernel_size
self.num_layers = num_layers
self.batch_first = batch_first
self.bias = bias
self.return_all_layers = return_all_layers
cell_list = []
for i in range(0, self.num_layers):
cur_input_dim = self.input_dim if i == 0 else self.hidden_dim[i - 1]
cell_list.append(ConvLSTMCell(input_dim=cur_input_dim,
hidden_dim=self.hidden_dim[i],
kernel_size=self.kernel_size[i],
bias=self.bias))
self.cell_list = nn.ModuleList(cell_list)
def forward(self, input_tensor, hidden_state=None):
if not self.batch_first:
input_tensor = input_tensor.permute(1, 0, 2, 3, 4)
b, t, _, h, w = input_tensor.size()
if hidden_state is not None:
raise NotImplementedError()
else:
hidden_state = self._init_hidden(batch_size=b,
image_size=(h, w))
layer_output_list = []
last_state_list = []
seq_len = input_tensor.size(1)
cur_layer_input = input_tensor
for layer_idx in range(self.num_layers):
h, c = hidden_state[layer_idx]
output_inner = []
for t in range(seq_len):
h, c = self.cell_list[layer_idx](input_tensor=cur_layer_input[:, t, :, :, :], cur_state=[h, c])
c_cur =torch.zeros_like(hidden_state[0][0])
c_cur = c_cur[:, :1, :, :]
output_inner.append(h)
layer_output = torch.stack(output_inner, dim=1)
cur_layer
layer_output_list.append(layer_output)
last_state_list.append([h, c])
if not self.return_all_layers:
return layer_output_list[-1], last_state_list[-1]
else:
return layer_output_list, last_state_list
def _init_hidden(self, batch_size, image_size):
init_states = []
for i in range(self.num_layers):
init_states.append(self.cell_list[i].init_hidden(batch_size, image_size))
return init_states
@staticmethod
def _check_kernel_size_consistency(kernel_size):
if not (isinstance(kernel_size, tuple) or
(isinstance(kernel_size, list) and all([isinstance(elem, tuple) for elem in kernel_size]))):
raise ValueError('`kernel_size` must be tuple or list of tuples')
@staticmethod
def _extend_for_multilayer(param, num_layers):
if not isinstance(param, list):
param = [param] * num_layers
return param
#实例化对象
model = ConvLSTM(input_dim=1, hidden_dim=[64, 64], kernel_size=[(1, 55), (1, 55)], num_layers=2)
#设置优化参数
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(list(model.parameters()), lr=0.001, momentum=0.9)
# 读取Excel文件
df = pd.read_excel(r'C:\Users\19738\Desktop\数据集\01.xlsx')
df = df.sort_values(by=['日期', '经度', '深度'])
# 将数据转换为numpy数组
data = df.values
time = df.iloc[:, 0]
longitude = df.iloc[:, 1]
depth = df.iloc[:, 2]
# 假设您的数据张量大小为 (num_samples,num_time_steps,1, num_lon, num_dep, )
num_samples = 100
num_lon = 2
num_dep = 55
num_time_steps = 11
# 构建新的数据张量,初始化为 0
temp_data = np.zeros((num_samples, num_time_steps, 1, num_lon, num_dep))
for i in range(num_samples):
for j in range(num_lon):
for k in range(num_dep):
for t in range(num_time_steps):
date_str = time[t].strftime('%Y/%m/%d')
lon_str = str(longitude[j])
dep_str = str(depth[k])
index = (df['日期'] == date_str) & (df['经度'] == lon_str) & (df['深度'] == dep_str)
temp_value = df.loc[index, '温度'].values[0]
temp_data[i, t, 0,j, k] = temp_value
temp_data_tensor = torch.from_numpy(temp_data)
# 定义训练数据和标签
train_data = temp_data_tensor[:, :6, :, :, :]
train_label = temp_data_tensor[:, 6:, :, :, :]
print(train_data.shape)
print(train_label.shape)
train_dataset = TensorDataset(train_data, train_label)
train_loader = DataLoader(train_dataset, batch_size=100, shuffle=True)
# 定义训练循环
for epoch in range(10):
for i, (inputs, labels) in enumerate(train_loader):
inputs, labels = inputs.to(device), labels.to(device)
model.to(device)
optimizer.zero_grad()
outputs = model(inputs)
print(inputs.shape)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
print(f"Epoch {epoch+1}, Batch {i+1}, Loss: {loss.item()}")
代码解释:
- 错误原因:
ConvLSTM模型的forward函数的最后一行缩进多了一个空格,导致return语句没有正确地返回值,导致cross_entropy_loss函数接收到的input参数是一个tuple类型,而不是Tensor类型。 - 解决方案: 将
forward函数最后一行缩进的空格去掉,使return语句能够正确地返回Tensor类型的值。 - 其他问题: 代码中还存在一些其他问题,例如
cur_layer变量没有被定义,需要检查代码中是否存在其他未定义的变量或错误。
总结:
该报错通常是由于代码缩进错误导致 return 语句无法正确地返回值。在编写代码时,需要注意代码的缩进问题,并及时检查代码中是否存在其他错误。
原文地址: https://www.cveoy.top/t/topic/mV2Q 著作权归作者所有。请勿转载和采集!