import cv2
import numpy as np
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import time
import os
import torchvision.transforms as transforms
from tqdm import tqdm
import pandas as pd

class Sobel(nn.Module):
    def __init__(self):
        super(Sobel, self).__init__()
        self.filter = nn.Conv2d(in_channels=1, out_channels=2, kernel_size=3, stride=1, padding=1, bias=False)

        Gx = torch.tensor([[2.0, 0.0, -2.0], [4.0, 0.0, -4.0], [2.0, 0.0, -2.0]])
        Gy = torch.tensor([[2.0, 4.0, 2.0], [0.0, 0.0, 0.0], [-2.0, -4.0, -2.0]])
        G = torch.cat([Gx.unsqueeze(0), Gy.unsqueeze(0)], 0)
        G = G.unsqueeze(1)
        self.filter.weight = nn.Parameter(G, requires_grad=False)

    def forward(self, img):
        x = self.filter(img)
        x = torch.mul(x, x)
        x = torch.sum(x, dim=1, keepdim=True)
        x = torch.sqrt(x)
        return x
    
    
class AverageFilter(nn.Module):
    def __init__(self, ksize:int=5):
        super(AverageFilter ,self).__init__()
        self.filter = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=ksize, stride=max((ksize-1)//2, 0), padding=1, bias=False)
        
        G = torch.ones(size=(ksize, ksize)) / (ksize**2)
        G = G.unsqueeze(0).unsqueeze(1)
        self.filter.weight = nn.Parameter(G, requires_grad=False)
        
    def forward(self, img):
        x = self.filter(img)
        return x

class DataBuffer():
    def __init__(self, num):
        self.num = int(num)
        self.queue = []
            
    def put(self, element):
        self.queue += [element]
        if len(self.queue)>self.num:
            pop_ele = self.queue.pop(0)
            return pop_ele
        
    def get(self):
        queue = []
        for element in self.queue:
            if element is not None:
                queue.append(element)
        
        return queue
        
    def is_full(self):
        if len(self.queue) < self.num:
            return False
        else:
            return True
        
def get_video_file_list(folder_path):
    files_list = os.listdir(folder_path)
    video_paths = []
    for file_name in files_list:
        if (file_name[-4:] in ['.mp4', '.flv' ,'.mts' ,'.avi']) or (file_name[-3:] in ['.ts']):
            video_paths.append(folder_path + '/' + file_name)
            
    return video_paths

if __name__ == '__main__':
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    trained_model_path = 'D:\\UC_data\\unet-main\\models\\unet_256_0.0001_1669203106.7905042.pt'
    model = torch.load(trained_model_path)
    model = model.to(device)

    transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Resize((256, 256)),
                transforms.Normalize([0.5], [0.5])
            ])
    
    blur = AverageFilter(3).to(device)
    mask_gray_th = 0.5
    step = 3
    path = 'D:\\UC_data\\20221102\\20230703'
    video_paths = get_video_file_list(path)
    
    data = pd.DataFrame(columns=['文件名', '中心点', '轮廓'])
    
    # 读取视频
    for idx, video_path in enumerate(video_paths):
        print('=============================','[{}/{}]'.format(idx+1, len(video_paths)),video_path)
        cap = cv2.VideoCapture(video_path)
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        down_sample_ratio = 2
        maxf = max(int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 1)
        max_show_size = 1080
        max_counter = []
    
        min_frame = 0
        max_frame = maxf+1

        file_type = video_path.split('/')[-1].split('.')[-1]
        file_name = video_path.split('/')[-1][:-len(file_type)-1]

        obuff = DataBuffer(16)
        fbuff = DataBuffer(16)
        crop_lower = round(h * 17/36)

        count = 0
        contours = [[]]

        rect = [1080, 1080, -1, -1]
        cap.set(cv2.CAP_PROP_POS_FRAMES, min_frame)
        
        shape = 0
        center_x = 0
        center_y = 0
        center_point = []
        # ===========================
        # Detection and Segmentation
        # ===========================
        for frame_idx in tqdm(range(min_frame, max_frame+1)):
            ret, frame = cap.read()
            if frame_idx % down_sample_ratio != 0: # decrease fps from 60 to 30
                continue
            
            if ret:
                obuff.put(frame)
                input_image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)[:,crop_lower:crop_lower+h]
                input_image = Image.fromarray(input_image)
                try:
                    input_image = transform(input_image).type(torch.FloatTensor).to(device)
                except:
                    print('============================= ',video_path)
                    print(h, int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
                    exit()
                    
                fbuff.put(input_image)
                if not fbuff.is_full():
                    continue
                if (frame_idx%max(step*2, 1))==0:
                    inputs = torch.cat(fbuff.queue, dim=0).unsqueeze(0)
                    a = time.time()
                    pred_mask = model(inputs).detach()
                    b = time.time()

                    pred_mask = F.interpolate(pred_mask, (h, h))
                    pred_mask = blur(pred_mask).squeeze()
                    pred_mask = (pred_mask > mask_gray_th).int()
                    pred_mask *= 255
                    pred_mask = pred_mask.cpu().numpy().astype('uint8')
                    contours, hierarchy = cv2.findContours(pred_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                    if len(contours) > 0:
                        contours = list(contours)
                        contours.sort(key=lambda c: cv2.contourArea(c), reverse=True)
                        contours[0][:,:,0] += crop_lower
                        max_counter = [contours[0]]
                    if (max_counter[0]).shape[0] > shape:
                        shape = (max_counter[0]).shape[0]
                        max_contours = max_counter

                    c = time.time()
                    the_contours = np.array(max_counter).squeeze()
                    
                    left_top = np.array([min(the_contours[:,0]), min(the_contours[:,1])])
                    right_bottom = np.array([max(the_contours[:,0]), max(the_contours[:,1])])
                    difference_x =  right_bottom[0] - left_top[0]
                    difference_y =  right_bottom[1] - left_top[1]
                    center_x += left_top[0]+(difference_x/2)
                    center_y += left_top[1]+(difference_y/2)
                    count +=1
                # output_frame = cv2.drawContours(obuff.queue[7], max_counter, -1, (0, 0, 205), 2)
                # cv2.imshow('pred', output_frame.astype('uint8'))
                # if cv2.waitKey(1) == ord('1'):
                #     exit()
        center_x = int(center_x/count)
        center_y = int(center_y/count)
        data = data.append({'文件名':file_name, '中心点':(center_x,center_y), '轮廓':max_contours}, ignore_index=True)
    
    output_path = 'D:\\UC_data\\static analysis\\center_point.xlsx'
    data.to_excel(output_path, index=False)
    print('Finish')
使用OpenCV和Python提取视频帧中心点和轮廓并保存到Excel

原文地址: https://www.cveoy.top/t/topic/fDLP 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录