此次修改使用的yolo框架为yolov5,其中大多为修改后的代码,可根据查找未改动处找到对应代码。
一、修改train.py文件
# 第一处修改:
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)')
parser.add_argument('--rect', action='store_true', help='rectangular training')
# 修改为
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=[512,864], help='train, val image size (pixels)')
parser.add_argument('--rect', action='store_true', default=True, help='rectangular training')
# 第二处修改:
# change start 2025/01/07 支持宽高
# hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers
if isinstance(imgsz, list):
hyp['obj'] *= (max(imgsz) / 640) ** 2 * 3. / nl
else:
hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers
# change end
# 第三处修改:
# Update mosaic border (optional)
# change start 2025/01/07 支持宽高
if isinstance(imgsz, int):
b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
dataset.mosaic_border = [b - imgsz, -b] # height, width borders
# change end
# 第四处修改(一般未启用):
# change start 2025/01/07 支持宽高
# sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size
if isinstance(imgsz, int):
sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size
else:
sz = random.randrange(int(max(imgsz) * 0.5), int(max(imgsz) * 1.5) + gs) // gs * gs # size
# change end
二、修改 ./loggers/__init__.py文件
if ni == 0 and self.tb and not self.opt.sync_bn:
# change start 2025/01/08 支持宽高
if isinstance(self.opt.imgsz, int):
n_imgsz=(self.opt.imgsz, self.opt.imgsz)
else:
n_imgsz=(self.opt.imgsz[0], self.opt.imgsz[1])
# change end
log_tensorboard_graph(self.tb, model, imgsz=n_imgsz)
三、修改 ./utils/dataloaders.py文件
# 第一处修改:
# change start 2025/01/07 支持宽高
# self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
# self.mosaic_border = [-img_size // 2, -img_size // 2]
self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
if isinstance(img_size, int):
self.mosaic_border = [-img_size // 2, -img_size // 2]
else:
self.mosaic_border = [-img_size[0] // 2, -img_size[1] // 2]
# change end
# 第二处修改:
# change start 2025/01/07 支持宽高
# r = self.img_size / max(h0, w0) # ratio
# if r != 1: # if sizes are not equal
# interp = cv2.INTER_LINEAR if (self.augment or r > 1) else cv2.INTER_AREA
# im = cv2.resize(im, (int(w0 * r), int(h0 * r)), interpolation=interp)
if isinstance(self.img_size, int):
r = self.img_size / max(h0, w0) # ratio
if r != 1: # if sizes are not equal
img = cv2.resize(im, (int(w0 * r), int(h0 * r)),
interpolation=cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR)
else:
img = cv2.resize(im, (self.img_size[1], self.img_size[0]),
interpolation=cv2.INTER_AREA) # (width,height)
# change end
# 第三处修改:
# change start 2025/01/07 支持宽高
# s = self.img_size
# yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y
if isinstance(self.img_size, int):
s = self.img_size
yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
else:
s_h, s_w = self.img_size
yc, xc = [int(random.uniform(-x, 2 * s + x)) for x, s in
zip(self.mosaic_border, self.img_size)] # mosaic center x, y
# change end
# 第四处修改:
# change start 2025/01/07 支持宽高
# place img in img4
# if i == 0: # top left
# img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
# x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
# x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
# elif i == 1: # top right
# x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
# x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
# elif i == 2: # bottom left
# x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
# x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
# elif i == 3: # bottom right
# x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
# x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
if isinstance(self.img_size, int):
# place img in img4
if i == 0: # top left
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
elif i == 1: # top right
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
elif i == 2: # bottom left
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
elif i == 3: # bottom right
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
else:
# place img in img4
if i == 0: # top left
img4 = np.full((s_h * 2, s_w * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
elif i == 1: # top right
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s_w * 2), yc
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
elif i == 2: # bottom left
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s_h * 2, yc + h)
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
elif i == 3: # bottom right
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s_w * 2), min(s_h * 2, yc + h)
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
# change end
# 第五处修改:
# change start 2025/01/07 支持宽高
# for x in (labels4[:, 1:], *segments4):
# np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
for x in (labels4[:, 1:], *segments4):
if isinstance(self.img_size, int):
np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
else:
np.clip(x[:, 0::2], 0, 2 * s_w, out=x[:, 0::2]) # clip when using random_perspective()
np.clip(x[:, 1::2], 0, 2 * s_h, out=x[:, 1::2]) # clip when using random_perspective()
# change end
四、修改 ./utils/augmentations.py, 如果使用的数据增强、增广等引用到imgsz参数, 比如
import albumentations as A
print('CurrentVersion => ', A.__version__)
check_version(A.__version__, '1.0.3', hard=True) # version requirement
# change start 2025/01/07 支持宽高
if isinstance(size, int):
sizeH = size
sizeW = size
else:
sizeH = size[0]
sizeW = size[1]
# change end
T = [
# 0.5-0.8 以0.0的概率随机裁剪并重新调整大小,参数height和width指定裁剪后的目标尺寸,scale=(0.8, 1.0)指定裁剪区域的面积比例范围,ratio=(0.9, 1.11)指定裁剪区域的宽高比范围。
A.RandomResizedCrop(height=sizeH, width=sizeW, scale=(0.8, 1.0), ratio=(0.9, 1.11), p=0.0),
# 0.1-0.3 以0.01的概率应用模糊处理。
A.Blur(p=0.01),
# 0.1-0.3 以0.01的概率应用中值模糊处理。
A.MedianBlur(p=0.01),
# 0.1 以0.01的概率将图像转换为灰度图像。
A.ToGray(p=0.01),
# 0.3-0.5 以0.01的概率应用对比度受限的自适应直方图均衡化。
A.CLAHE(p=0.01),
# 0.5 以0.0的概率随机调整图像的亮度和对比度。
A.RandomBrightnessContrast(p=0.0),
# 以0.0的概率随机调整图像的Gamma值。
A.RandomGamma(p=0.0),
# 以0.0的概率应用图像压缩,quality_lower=75指定了压缩的质量下限。
A.ImageCompression(quality_lower=75, p=0.0)] # transforms
self.transform = A.Compose(T, bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))
LOGGER.info(prefix + ', '.join(f'{x}'.replace('always_apply=False, ', '') for x in T if x.p))