PyTorch 特征提取示例：文本和图像

本文提供两个基于 PyTorch 的特征提取示例代码：一个是使用预训练词向量进行文本数据特征提取，另一个是利用预训练卷积神经网络（CNN）提取图像特征。

1. 文本数据的词嵌入特征提取（使用预训练的词向量）

import torch
from torchtext.vocab import GloVe
import torch.nn as nn

# 选择预训练的词向量模型和维度
embedding = GloVe(name='6B', dim=100)

# 示例文本数据
sentences = [
    'This is the first sentence.',
    'This sentence is the second sentence.',
    'And this is the third one.',
    'Is this the first sentence?'
]

# 构建词汇表
vocab = set([word for sentence in sentences for word in sentence.split()])

# 构建词向量矩阵
embedding_matrix = torch.zeros(len(vocab), embedding.vectors.size(1))
for i, word in enumerate(vocab):
    if word in embedding.stoi:
        embedding_matrix[i] = embedding[word]

# 定义模型
class EmbeddingModel(nn.Module):
    def __init__(self):
        super(EmbeddingModel, self).__init__()
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=True)
    
    def forward(self, inputs):
        return self.embedding(inputs)

# 初始化模型
model = EmbeddingModel()

# 将文本转换为词嵌入特征
inputs = torch.tensor([[embedding.stoi[word] for word in sentence.split()] for sentence in sentences])
features = model(inputs)

# 打印特征矩阵
print(features)

2. 图像数据的预训练卷积神经网络（CNN）特征提取

import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image

# 初始化预训练模型（这里以ResNet18为例）
model = models.resnet18(pretrained=True)
model = model.eval()

# 示例图像数据
image_path = 'path_to_your_image.jpg'
image_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 加载并预处理图像
image = Image.open(image_path)
image = image_transform(image)
image = image.unsqueeze(0)

# 提取图像特征
with torch.no_grad():
    features = model(image)

# 打印特征向量
print(features)

请确保已安装 PyTorch 和 torchvision 库，并根据您的具体数据和需求进行适当的调整。这些示例代码可以用于基于 PyTorch 的特征提取任务。