利用Yolov5实现旋转框目标检测（数据处理部分）

2022年4月23日 751点热度 3人点赞 3条评论

HBB转OBB格式

本文是第一篇目标检测系列文章，主要用于介绍Yolov5在旋转框目标检测的数据处理，巩固学习！！！
用到的数据是Airbus ship detection dataset，其余可以以此为例。

用到的代码

数据处理

（1）加载Airbus ship数据处理代码，只需获取mask_to_rotationbb.py即可，然后对代码做一些修改。

import os
import cv2
from tqdm import tqdm
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from skimage.io import imread
import matplotlib.pyplot as plt
from skimage.segmentation import mark_boundaries
from skimage.measure import label, regionprops
#ship_dir = ''
train_image_dir = "./Target_detection/kaggle-airbus-ship-detection/train/"
output_image_dir = "./Target_detection/kaggle-airbus-ship-detection/labels_rotate/"

from skimage.morphology import label
def multi_rle_encode(img):
    labels = label(img[:, :, 0])
    return [rle_encode(labels==k) for k in np.unique(labels[labels>0])]

def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle_decode(mask_rle, shape=(768, 768)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T  # Needed to align to RLE direction

def masks_as_image(in_mask_list, all_masks=None):
    # Take the individual ship masks and create a single mask array for all ships
    if all_masks is None:
        all_masks = np.zeros((768, 768), dtype = np.int16)
    #if isinstance(in_mask_list, list):
    for mask in in_mask_list:
        if isinstance(mask, str):
            all_masks += rle_decode(mask)
    return np.expand_dims(all_masks, -1)

masks = pd.read_csv("./Target_detection/kaggle-airbus-ship-detection/train_ship_segmentations.csv")
print(masks.shape[0], 'masks found')
print(masks['ImageId'].value_counts().shape[0])
print(masks.head())

images_with_ship = masks.ImageId[masks.EncodedPixels.isnull()==False]
images_with_ship = np.unique(images_with_ship.values)
print('There are ' +str(len(images_with_ship)) + ' image files with masks')

plotme = 1
image_size = 768
for i in tqdm(range(0, 10)):
    image = images_with_ship[i]

    if plotme == 1:
        fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize = (15, 5))
    img_0 = cv2.imread(train_image_dir+'/' + image)
    rle_0 = masks.query('ImageId=="'+image+'"')['EncodedPixels']
    mask_0 = masks_as_image(rle_0)
    #
    # 
    lbl_0, lbl_cnt = label(mask_0, return_num=True) 
    #props = regionprops(lbl_0)
    img_1 = img_0.copy()
    #print ('Image', image, lbl_cnt)
    for i in range(1, lbl_cnt+1):
        mask = np.array((lbl_0 == i).astype('uint8')[..., 0])
        mask = cv2.resize(mask, (768, 768))
        cnts, hierarchy = cv2.findContours((255*mask).astype('uint8'), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        rect = cv2.minAreaRect(cnts[0])
        '''
        这是一个大坑，网上说opencv 4.5之前和4.5之后的角度定义不一样，因此注释的是4.5之前的表示，
        4.5之后的表示仅需一行代码即可。
        '''
        '''
        if rect[1][1]>rect[1][0]:
            angle = 90-rect[2]
        else:
            angle = -rect[2]
        '''
        angle = -rect[2]
        if rect[1][0]>rect[1][1]:
            angle = 90-rect[2]
        else:
            angle = -(90-rect[2])

        box = cv2.boxPoints(rect)
        #print (box)
        box = np.int0(box)

        if plotme == 1:
            cv2.drawContours(img_1,[box],0,(0,191,255),2)
        x = int(rect[0][0])
        y = int(rect[0][1])
        #print (rect, angle, 360*((props[i-1].orientation + np.pi/2)/(2*np.pi)), x, y)
        if plotme == 1:
            cv2.circle(img_1, ( x, y ), 5, (255, 0, 0), 3)
        print(str(rect[0][0]/image_size) + ' ' + str(rect[0][1]/image_size) + ' ' + str(rect[1][0]/image_size) + ' ' + str(rect[1][1]) + ' ' + '1' + ' ' + str(angle/90) + '\n' )
    '''
    if plotme == 1:
        ax1.imshow(img_0)
        ax1.set_title('Image')
        ax2.set_title('Mask')
        ax3.set_title('Image with derived bounding box')
        ax2.imshow(mask_0[...,0], cmap='gray')
        ax3.imshow(img_1)
        plt.show()
    '''
plotme = 0
rot_bboxes_dict = {}

for i in tqdm(range(0, len(images_with_ship))):

    image = images_with_ship[i]
    '''
    这里为每一张影像建立一个txt文件,保存格式为：类别, x, y, w, h, theta；均为归一化后的值
    举个例子：
    0 0.44062503178914386 0.08958334724108379 0.006987712035576503 0.004658474586904049 0.7048327975802952 
    0 0.5630836486816406 0.2586206793785095 0.0159581924478213 0.035785041749477386 0.7577621459960937 
    '''
    out_file = open(output_image_dir+'/' + image[:-4] + '.txt', 'w')    

    if plotme == 1:
        fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize = (15, 5))
    img_0 = cv2.imread(train_image_dir+'/' + image)
    rle_0 = masks.query('ImageId=="'+image+'"')['EncodedPixels']
    mask_0 = masks_as_image(rle_0)
    #
    # 
    lbl_0, lbl_cnt = label(mask_0, return_num=True) 
    #props = regionprops(lbl_0)
    img_1 = img_0.copy()
    bboxes = []
    bboxes1 = []
    for i in range(1, lbl_cnt+1):
        mask = np.array((lbl_0 == i).astype('uint8')[..., 0])
        mask = cv2.resize(mask, (768, 768))
        cnts, hierarchy = cv2.findContours((255*mask).astype('uint8'), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        rect = cv2.minAreaRect(cnts[0])
        '''
        这是一个大坑，网上说opencv 4.5之前和4.5之后的角度定义不一样，因此注释的是4.5之前的表示，
        4.5之后的表示仅需一行代码即可。
        '''
        '''
        if rect[1][1]>rect[1][0]:
            angle = 90-rect[2]
        else:
            angle = -rect[2]
        '''
        angle = rect[2]
        box = cv2.boxPoints(rect)
        #print (box)
        box = np.int0(box)

        if plotme == 1:
            cv2.drawContours(img_1,[box],0,(0,191,255),2)
        x = int(rect[0][0])
        y = int(rect[0][1])
        if plotme == 1:
            cv2.circle(img_1, ( x, y ), 5, (255, 0, 0), 3)
        bboxes.append([rect[0][0]/image_size, rect[0][1]/image_size, rect[1][0]/image_size, rect[1][1]/image_size, angle/90])
        bboxes1.append([0, rect[0][0]/image_size, rect[0][1]/image_size, rect[1][0]/image_size, rect[1][1]/image_size, angle/90])
    #print (bboxes1)

    rot_bboxes_dict[image] = bboxes.copy()
    #print (rot_bboxes_dict)
    for k, data in enumerate(bboxes1):
        #print (k, data)
        #print (type(data))
        for j in data:
            #print ('j',j)
            out_file.write(str(j)+ ' ')
        out_file.write('\n')

    '''
    if plotme == 1:
        ax1.imshow(img_0)
        ax1.set_title('Image')
        ax2.set_title('Mask')
        ax3.set_title('Image with derived bounding box')
        ax2.imshow(mask_0[...,0], cmap='gray')
        ax3.imshow(img_1)
        plt.show()
    '''
bboxes_df = pd.DataFrame([rot_bboxes_dict])
bboxes_df = bboxes_df.transpose()
bboxes_df.columns = ['bbox_list']
print(bboxes_df.head())

(2) 以上代码运行后，会得到txt形式的文件夹，如图所示：