Yolov5修改模型训练宽高为自定义宽高|Zyh

此次修改使用的yolo框架为yolov5，其中大多为修改后的代码，可根据查找未改动处找到对应代码。

一、修改train.py文件

# 第一处修改：
    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
    parser.add_argument('--rect', action='store_true', help='rectangular training')
# 修改为
    parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=[512,864], help='train, val image size (pixels)')
    parser.add_argument('--rect', action='store_true', default=True, help='rectangular training')

# 第二处修改：
    # change start 2025/01/07 支持宽高
    # hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers

    if isinstance(imgsz, list):
        hyp['obj'] *= (max(imgsz) / 640) ** 2 * 3. / nl
    else:
        hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers
    # change end

# 第三处修改：
        # Update mosaic border (optional)
        # change start 2025/01/07 支持宽高
        if isinstance(imgsz, int):
            b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
            dataset.mosaic_border = [b - imgsz, -b]  # height, width borders
        # change end

# 第四处修改（一般未启用）：
                # change start 2025/01/07 支持宽高
                # sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size

                if isinstance(imgsz, int):
                    sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
                else:
                    sz = random.randrange(int(max(imgsz) * 0.5), int(max(imgsz) * 1.5) + gs) // gs * gs  # size
                # change end

二、修改 ./loggers/init.py文件

                if ni == 0 and self.tb and not self.opt.sync_bn:
                    # change start 2025/01/08 支持宽高
                    if isinstance(self.opt.imgsz, int):
                        n_imgsz=(self.opt.imgsz, self.opt.imgsz)
                    else:
                        n_imgsz=(self.opt.imgsz[0], self.opt.imgsz[1])
                    # change end
                    log_tensorboard_graph(self.tb, model, imgsz=n_imgsz)

三、修改 ./utils/dataloaders.py文件

# 第一处修改：
        # change start 2025/01/07 支持宽高
        # self.mosaic = self.augment and not self.rect  # load 4 images at a time into a mosaic (only during training)
        # self.mosaic_border = [-img_size // 2, -img_size // 2]

        self.mosaic = self.augment and not self.rect  # load 4 images at a time into a mosaic (only during training)
        if isinstance(img_size, int):
            self.mosaic_border = [-img_size // 2, -img_size // 2]
        else:
            self.mosaic_border = [-img_size[0] // 2, -img_size[1] // 2]
        # change end

# 第二处修改：
            # change start 2025/01/07 支持宽高
            # r = self.img_size / max(h0, w0)  # ratio
            # if r != 1:  # if sizes are not equal
            #    interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
            #    im = cv2.resize(im, (int(w0 * r), int(h0 * r)), interpolation=interp)

            if isinstance(self.img_size, int):
                r = self.img_size / max(h0, w0)  # ratio
                if r != 1:  # if sizes are not equal
                    img = cv2.resize(im, (int(w0 * r), int(h0 * r)),
                                     interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR)
                else:
                    img = cv2.resize(im, (self.img_size[1], self.img_size[0]),
                                     interpolation=cv2.INTER_AREA)  # (width,height)
            # change end

# 第三处修改：
        # change start 2025/01/07 支持宽高
        # s = self.img_size
        # yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border)  # mosaic center x, y

        if isinstance(self.img_size, int):
            s = self.img_size
            yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border]  # mosaic center x, y
        else:
            s_h, s_w = self.img_size
            yc, xc = [int(random.uniform(-x, 2 * s + x)) for x, s in
                      zip(self.mosaic_border, self.img_size)]  # mosaic center x, y
        # change end

# 第四处修改：
            # change start 2025/01/07 支持宽高
            # place img in img4
            # if i == 0:  # top left
            #     img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
            #     x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
            #     x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
            # elif i == 1:  # top right
            #     x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
            #     x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
            # elif i == 2:  # bottom left
            #     x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
            #     x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
            # elif i == 3:  # bottom right
            #     x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
            #     x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)

            if isinstance(self.img_size, int):
                # place img in img4
                if i == 0:  # top left
                    img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
                    x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
                    x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
                elif i == 1:  # top right
                    x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
                    x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
                elif i == 2:  # bottom left
                    x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
                    x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
                elif i == 3:  # bottom right
                    x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
                    x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
            else:
                # place img in img4
                if i == 0:  # top left
                    img4 = np.full((s_h * 2, s_w * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
                    x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
                    x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
                elif i == 1:  # top right
                    x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s_w * 2), yc
                    x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
                elif i == 2:  # bottom left
                    x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s_h * 2, yc + h)
                    x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
                elif i == 3:  # bottom right
                    x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s_w * 2), min(s_h * 2, yc + h)
                    x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
            # change end

# 第五处修改：
        # change start 2025/01/07 支持宽高
        # for x in (labels4[:, 1:], *segments4):
        #     np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()

        for x in (labels4[:, 1:], *segments4):
            if isinstance(self.img_size, int):
                np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
            else:
                np.clip(x[:, 0::2], 0, 2 * s_w, out=x[:, 0::2])  # clip when using random_perspective()
                np.clip(x[:, 1::2], 0, 2 * s_h, out=x[:, 1::2])  # clip when using random_perspective()
        # change end

四、修改 ./utils/augmentations.py, 如果使用的数据增强、增广等引用到imgsz参数, 比如

import albumentations as A
            print('CurrentVersion => ', A.__version__)
            check_version(A.__version__, '1.0.3', hard=True)  # version requirement

            # change start 2025/01/07 支持宽高
            if isinstance(size, int):
                sizeH = size
                sizeW = size
            else:
                sizeH = size[0]
                sizeW = size[1]
            # change end

            T = [
                # 0.5-0.8 以0.0的概率随机裁剪并重新调整大小，参数height和width指定裁剪后的目标尺寸，scale=(0.8, 1.0)指定裁剪区域的面积比例范围，ratio=(0.9, 1.11)指定裁剪区域的宽高比范围。
                A.RandomResizedCrop(height=sizeH, width=sizeW, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0),
                # 0.1-0.3 以0.01的概率应用模糊处理。
                A.Blur(p=0.01),
                # 0.1-0.3 以0.01的概率应用中值模糊处理。
                A.MedianBlur(p=0.01),
                # 0.1 以0.01的概率将图像转换为灰度图像。
                A.ToGray(p=0.01),
                # 0.3-0.5 以0.01的概率应用对比度受限的自适应直方图均衡化。
                A.CLAHE(p=0.01),
                # 0.5 以0.0的概率随机调整图像的亮度和对比度。
                A.RandomBrightnessContrast(p=0.0),
                # 以0.0的概率随机调整图像的Gamma值。
                A.RandomGamma(p=0.0),
                # 以0.0的概率应用图像压缩，quality_lower=75指定了压缩的质量下限。
                A.ImageCompression(quality_lower=75, p=0.0)]  # transforms
            self.transform = A.Compose(T, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))

            LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p))