diff --git a/data/augmentations.py b/data/augmentations.py old mode 100644 new mode 100755 diff --git a/data/coco_dataset.py b/data/coco_dataset.py old mode 100644 new mode 100755 index 68827d2..751c5d8 --- a/data/coco_dataset.py +++ b/data/coco_dataset.py @@ -7,9 +7,10 @@ import os import cv2 import numpy as np -from data.augmentations import Augmentations, CopyPaste, CutOut -from data.process_box import x1y1_to_xcyc, x1y1wh_to_xyxy, xyxy_to_x1y1wh, normalize_bboxes, resize_bboxes, adjust_bboxes - +from augmentations import Augmentations, CopyPaste, CutOut +from process_box import x1y1_to_xcyc, x1y1wh_to_xyxy, xyxy_to_x1y1wh, normalize_bboxes, resize_bboxes, adjust_bboxes +import imgaug.augmenters as iaa +from imgaug.augmentables.bbs import BoundingBoxesOnImage class CustomDataset(Dataset): @@ -20,7 +21,9 @@ def __init__(self, image_path, annotation_path, image_size=640, normalize=True, self.image_root = os.path.join('/', *image_path.split('/')[:-1]) self.image_path = image_path self.annotation_path = annotation_path - self.normalize = normalize + self.normalize = False + import json + annot = json.load(open(annotation_path)) self.coco = COCO(self.annotation_path) self.image_paths = sorted(os.listdir(self.image_path)) self.ids = sorted(list(self.coco.imgs.keys())) @@ -57,7 +60,10 @@ def __getitem__(self, index): img, ratio = self.load_image(image_id) labels = self.load_labels(image_id=image_id, ratio=ratio) - + bboxes = np.array(img_data['labels'][:,:4]) + bboxes_iaa = BoundingBoxesOnImage([], img.shape).from_xyxy_array(bboxes, img.shape) + image_after = bboxes_iaa.draw_on_image(img, size=2) + cv2.imwrite('./labelled/image_' + str(index) + '.jpg', image_after) if self.augmentations: img_data = {'img': img, 'labels':labels} diff --git a/data/process_box.py b/data/process_box.py old mode 100644 new mode 100755 diff --git a/data/yolo_to_coco.py b/data/yolo_to_coco.py old mode 100644 new mode 100755 diff --git a/models/detector.py b/models/detector.py old mode 100644 new mode 100755 diff --git a/models/loss.py b/models/loss.py old mode 100644 new mode 100755 diff --git a/models/model.py b/models/model.py old mode 100644 new mode 100755 diff --git a/models/model_bn.py b/models/model_bn.py old mode 100644 new mode 100755 diff --git a/models/utils.py b/models/utils.py old mode 100644 new mode 100755 index ec2f23d..22997c0 --- a/models/utils.py +++ b/models/utils.py @@ -3,6 +3,8 @@ import torch import math import numpy as np +import cv2 +from typing import Union def init_weights(model): @@ -31,4 +33,174 @@ def variance_scaling_(tensor, gain=1.): fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) std = math.sqrt(gain / float(fan_in)) - return _no_grad_normal_(tensor, 0., std) \ No newline at end of file + return _no_grad_normal_(tensor, 0., std) + +class Swish(nn.Module): + def forward(self, x): + return x * torch.sigmoid(x) + +import torch.optim as optim +import math + +def get_optimizer(opt, model): + if opt['optimizer'] == 'adam': + return optim.Adam(model.parameters(), lr=opt['learning_rate']) + elif opt['optimizer'] == 'sgd': + return optim.SGD(model.parameters(), lr=opt['learning_rate'], momentum=0.9) + elif opt['optimizer'] == 'asgd': + return optim.ASGD(model.parameters(), lr=opt['learning_rate']) + + +def get_scheduler(opt, optimizer, len_dataset): + + if opt['lr_scheduler'] == 'multistep_lr': + return optim.lr_scheduler.MultiStepLR(optimizer, milestones=[int(opt['epochs']*0.7), int(opt['epochs']*0.9)], gamma=0.1) + elif opt['lr_scheduler'] == 'cosine': + lf = lambda x: (((1 + math.cos(x * math.pi / opt['epochs'])) / 2) ** 1.0) * 0.95 + 0.05 + return optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) + + elif opt['lr_scheduler'] == 'cosine_annealing': + return optim.lr_scheduler.CosineAnnealingLR(optimizer, len_dataset, eta_min=0) + +class BBoxTransform(nn.Module): + def forward(self, anchors, regression): + """ + decode_box_outputs adapted from https://github.com/google/automl/blob/master/efficientdet/anchors.py + + Args: + anchors: [batchsize, boxes, (y1, x1, y2, x2)] + regression: [batchsize, boxes, (dy, dx, dh, dw)] + + Returns: + + """ + y_centers_a = (anchors[..., 0] + anchors[..., 2]) / 2 + x_centers_a = (anchors[..., 1] + anchors[..., 3]) / 2 + ha = anchors[..., 2] - anchors[..., 0] + wa = anchors[..., 3] - anchors[..., 1] + + w = regression[..., 3].exp() * wa + h = regression[..., 2].exp() * ha + + y_centers = regression[..., 0] * ha + y_centers_a + x_centers = regression[..., 1] * wa + x_centers_a + + ymin = y_centers - h / 2. + xmin = x_centers - w / 2. + ymax = y_centers + h / 2. + xmax = x_centers + w / 2. + + return torch.stack([xmin, ymin, xmax, ymax], dim=2) + +class ClipBoxes(nn.Module): + + def __init__(self): + super(ClipBoxes, self).__init__() + + def forward(self, boxes, img): + batch_size, num_channels, height, width = img.shape + + boxes[:, :, 0] = torch.clamp(boxes[:, :, 0], min=0) + boxes[:, :, 1] = torch.clamp(boxes[:, :, 1], min=0) + + boxes[:, :, 2] = torch.clamp(boxes[:, :, 2], max=width - 1) + boxes[:, :, 3] = torch.clamp(boxes[:, :, 3], max=height - 1) + + return boxes + +def invert_affine(metas: Union[float, list, tuple], preds): + for i in range(len(preds)): + if len(preds[i]['rois']) == 0: + continue + else: + if metas is float: + preds[i]['rois'][:, [0, 2]] = preds[i]['rois'][:, [0, 2]] / metas + preds[i]['rois'][:, [1, 3]] = preds[i]['rois'][:, [1, 3]] / metas + else: + new_w, new_h, old_w, old_h, padding_w, padding_h = metas[i] + preds[i]['rois'][:, [0, 2]] = preds[i]['rois'][:, [0, 2]] / (new_w / old_w) + preds[i]['rois'][:, [1, 3]] = preds[i]['rois'][:, [1, 3]] / (new_h / old_h) + return preds + + +def preprocess(*image_path, max_size=512, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): + ori_imgs = [cv2.imread(img_path) for img_path in image_path] + normalized_imgs = [(img[..., ::-1] / 255 - mean) / std for img in ori_imgs] + imgs_meta = [aspectaware_resize_padding(img, max_size, max_size, + means=None) for img in normalized_imgs] + framed_imgs = [img_meta[0] for img_meta in imgs_meta] + framed_metas = [img_meta[1:] for img_meta in imgs_meta] + + return ori_imgs, framed_imgs, framed_metas + +def aspectaware_resize_padding(image, width, height, interpolation=None, means=None): + old_h, old_w, c = image.shape + if old_w > old_h: + new_w = width + new_h = int(width / old_w * old_h) + else: + new_w = int(height / old_h * old_w) + new_h = height + + canvas = np.zeros((height, height, c), np.float32) + if means is not None: + canvas[...] = means + + if new_w != old_w or new_h != old_h: + if interpolation is None: + image = cv2.resize(image, (new_w, new_h)) + else: + image = cv2.resize(image, (new_w, new_h), interpolation=interpolation) + + padding_h = height - new_h + padding_w = width - new_w + + if c > 1: + canvas[:new_h, :new_w] = image + else: + if len(image.shape) == 2: + canvas[:new_h, :new_w, 0] = image + else: + canvas[:new_h, :new_w] = image + + return canvas, new_w, new_h, old_w, old_h, padding_w, padding_h, + +def postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold): + transformed_anchors = regressBoxes(anchors, regression) + transformed_anchors = clipBoxes(transformed_anchors, x) + scores = torch.max(classification, dim=2, keepdim=True)[0] + scores_over_thresh = (scores > threshold)[:, :, 0] + out = [] + for i in range(x.shape[0]): + if scores_over_thresh[i].sum() == 0: + out.append({ + 'rois': np.array(()), + 'class_ids': np.array(()), + 'scores': np.array(()), + }) + continue + + classification_per = classification[i, scores_over_thresh[i, :], ...].permute(1, 0) + transformed_anchors_per = transformed_anchors[i, scores_over_thresh[i, :], ...] + scores_per = scores[i, scores_over_thresh[i, :], ...] + scores_, classes_ = classification_per.max(dim=0) + anchors_nms_idx = batched_nms(transformed_anchors_per, scores_per[:, 0], classes_, iou_threshold=iou_threshold) + + if anchors_nms_idx.shape[0] != 0: + classes_ = classes_[anchors_nms_idx] + scores_ = scores_[anchors_nms_idx] + boxes_ = transformed_anchors_per[anchors_nms_idx, :] + + out.append({ + 'rois': boxes_.cpu().numpy(), + 'class_ids': classes_.cpu().numpy(), + 'scores': scores_.cpu().numpy(), + }) + else: + out.append({ + 'rois': np.array(()), + 'class_ids': np.array(()), + 'scores': np.array(()), + }) + + return out diff --git a/my_model/model.py b/my_model/model.py new file mode 100755 index 0000000..c7b5971 --- /dev/null +++ b/my_model/model.py @@ -0,0 +1,549 @@ +import torch +import torch.nn as nn +import timm +import numpy as np +import itertools +#timm.list_models(pretrained=True) +#'hrnet_w64' +#tf_efficientnet_b3.ns_jft_in1k +#'tf_efficientnet_b2.ns_jft_in1k' +#'tf_efficientnet_b0.aa_in1k' CHECKED OK SLOW +#'tf_efficientnet_lite0.in1k' CHECKED OK BETTER +#'tf_efficientnetv2_b0.in1k' LIGHTWEIGHT +#'tf_efficientnet_lite4.in1k' CHECKED SLOW +#'tf_inception_v3' +#'tf_mixnet_m.in1k' CHECKED +#'tf_mobilenetv3_small_minimal_100.in1k' # CHECKED SLOW +#'efficientnetv2_rw_t.ra2_in1k' # CHECKED +#'efficientnet_lite0.ra_in1k' # CHECKED SLOWER +#'mobilenetv3_small_100.lamb_in1k' #CHECKED SLOW BAD +#'tf_efficientnet_b0.ap_in1k' # CHECKED SLOWER GOOD +#'tf_efficientnetv2_s.in1k' # CHECKED SLOWER GOOD +class TyNet(nn.Module): + def __init__(self, num_classes=80, backbone='tf_efficientnet_lite0.in1k', out_size=64, nc=80, compound_coef=0, **kwargs) -> None: + super(TyNet, self).__init__() + + #avail_pretrained_models= timm.list_models(pretrained=True) + self.compound_coef = compound_coef + self.anchor_scale = [4., 4., 4., 4., 4., 4., 4., 5., 4.] + self.aspect_ratios = kwargs.get('ratios', [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]) + self.num_scales = len(kwargs.get('scales', [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])) + num_anchors = len(self.aspect_ratios) * self.num_scales + self.num_classes = num_classes + self.pyramid_levels = [3] + self.backbone = timm.create_model(backbone, pretrained=True, features_only=True) + x=torch.randn(1,3,224,224) + out=self.backbone(x) + layers = [x.shape[1] for x in out] + + #avail_pretrained_models = timm.list_models(pretrained=True) + + self.neck = TyNeck(layers=layers, out_size=out_size) + #self.regressor = Regressor(in_channels=out_size, num_anchors=num_anchors, num_layers=3, pyramid_levels=3) + #self.classifier = Classifier(in_channels=out_size, num_anchors=num_anchors, num_classes=self.num_classes, num_layers=3) + + #self.anchors = Anchors(anchor_scale=self.anchor_scale[compound_coef], + # pyramid_levels=(torch.arange(self.pyramid_levels[self.compound_coef]) + 3).tolist(), + # **kwargs) + + self.head = TyHead(out_size=out_size, num_anchors=9, num_classes=num_classes, num_layers=3, anchor_scale=self.anchor_scale[compound_coef], + pyramid_levels=(torch.arange(self.pyramid_levels[self.compound_coef]) + 3).tolist(), **kwargs) + + def forward(self, inputs): + + x = self.backbone(inputs)[-3:] + x = self.neck(x) + x, regression, classification, anchors = self.head(x, inputs, inputs.dtype) + + return x, regression, classification, anchors + + +class TyNeck(nn.Module): + def __init__(self, layers=[512, 1024, 2048], out_size=64, procedure=[3, 4]) -> None: + super(TyNeck, self).__init__() + + self.out_layer1 = Conv(in_ch=layers[-1], out_ch=out_size * 2, k_size=3, s=1, p=1) + self.out_layer2 = Conv(in_ch=out_size * 2, out_ch=out_size, k_size=3, s=1, p=1) + self.out_layer = nn.Sequential(self.out_layer1, self.out_layer2) + module_dict = {} + for i in layers[:-1]: + module_dict[str(i)] = Conv(in_ch=i, out_ch=layers[-1], k_size=1, s=1, p=0) + + self.upconv_dict = nn.ModuleDict(module_dict) + + self.layer1 = nn.Sequential(*[ScalableCSPResBlock(layers[-1])] * procedure[0]) + self.layer2 = nn.Sequential(*[ScalableCSPResBlock(layers[-1])] * procedure[1]) + self.layers = nn.ModuleList([self.layer1, self.layer2]) + self.upsample = nn.Upsample(scale_factor=2.0, mode='bilinear') + + self.act = nn.SiLU() + + def upsample_add(self, x, y): + + y = self.act(self.upconv_dict[str(y.shape[1])](y)) + + return self.upsample(x) + y + + def forward(self, inputs): + + x = inputs.pop(-1) + + mids = [x] + for layer in self.layers: + x = self.upsample_add(x, inputs.pop(-1)) + x = layer(x) + mids.append(x) + + outs = [] + for mid in mids: + outs.append(self.out_layer(mid)) + + return outs + +class TyHead(nn.Module): + def __init__(self, out_size, anchor_scale, pyramid_levels, num_anchors=9, num_classes=80, num_layers=3, **kwargs): + super(TyHead, self).__init__() + + self.regressor = Regressor2(in_channels=out_size, num_anchors=num_anchors, num_layers=num_layers) + self.classifier = Classifier2(in_channels=out_size, num_anchors=9, num_classes=num_classes, num_layers=3) + + self.anchors = Anchors(anchor_scale=anchor_scale, + pyramid_levels=pyramid_levels, + **kwargs) + + def forward(self, x, inputs, dtype): + + regression = self.regressor(x) + classification = self.classifier(x) + anchors = self.anchors(inputs, dtype) + + return x, regression, classification, anchors + +class Regressor(nn.Module): + """ + modified by Zylo117 + """ + + def __init__(self, in_channels, num_anchors, num_layers, pyramid_levels=5, onnx_export=False): + super(Regressor, self).__init__() + self.num_layers = num_layers + + self.conv_list = nn.ModuleList( + [SeparableConvBlock(in_channels, in_channels, norm=False, activation=False) for i in range(num_layers)]) + self.bn_list = nn.ModuleList( + [nn.ModuleList([nn.BatchNorm2d(in_channels, momentum=0.1, eps=1e-5) for i in range(num_layers)]) for j in + range(pyramid_levels)]) + self.header = SeparableConvBlock(in_channels, num_anchors * 4, norm=False, activation=False) + self.swish = MemoryEfficientSwish() if not onnx_export else Swish() + + def forward(self, inputs): + feats = [] + for feat, bn_list in zip(inputs, self.bn_list): + for i, bn, conv in zip(range(self.num_layers), bn_list, self.conv_list): + feat = conv(feat) + feat = bn(feat) + feat = self.swish(feat) + feat = self.header(feat) + + feat = feat.permute(0, 2, 3, 1) + feat = feat.contiguous().view(feat.shape[0], -1, 4) + + feats.append(feat) + + feats = torch.cat(feats, dim=1) + + return feats + +import math +import torch.nn.functional as F +class Conv2dStaticSamePadding(nn.Module): + """ + created by Zylo117 + The real keras/tensorflow conv2d with same padding + """ + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, bias=True, groups=1, dilation=1, **kwargs): + super().__init__() + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, + bias=bias, groups=groups) + self.stride = self.conv.stride + self.kernel_size = self.conv.kernel_size + self.dilation = self.conv.dilation + + if isinstance(self.stride, int): + self.stride = [self.stride] * 2 + elif len(self.stride) == 1: + self.stride = [self.stride[0]] * 2 + + if isinstance(self.kernel_size, int): + self.kernel_size = [self.kernel_size] * 2 + elif len(self.kernel_size) == 1: + self.kernel_size = [self.kernel_size[0]] * 2 + + def forward(self, x): + h, w = x.shape[-2:] + + extra_h = (math.ceil(w / self.stride[1]) - 1) * self.stride[1] - w + self.kernel_size[1] + extra_v = (math.ceil(h / self.stride[0]) - 1) * self.stride[0] - h + self.kernel_size[0] + + left = extra_h // 2 + right = extra_h - left + top = extra_v // 2 + bottom = extra_v - top + + x = F.pad(x, [left, right, top, bottom]) + + x = self.conv(x) + return x +class SeparableConvBlock(nn.Module): + """ + created by Zylo117 + """ + + def __init__(self, in_channels, out_channels=None, norm=True, activation=False, onnx_export=False): + super(SeparableConvBlock, self).__init__() + if out_channels is None: + out_channels = in_channels + + # Q: whether separate conv + # share bias between depthwise_conv and pointwise_conv + # or just pointwise_conv apply bias. + # A: Confirmed, just pointwise_conv applies bias, depthwise_conv has no bias. + + self.depthwise_conv = Conv2dStaticSamePadding(in_channels, in_channels, + kernel_size=3, stride=1, groups=in_channels, bias=False) + self.pointwise_conv = Conv2dStaticSamePadding(in_channels, out_channels, kernel_size=1, stride=1) + + self.norm = norm + if self.norm: + # Warning: pytorch momentum is different from tensorflow's, momentum_pytorch = 1 - momentum_tensorflow + self.bn = nn.BatchNorm2d(num_features=out_channels, momentum=0.1, eps=1e-3,track_running_stats=False) + + self.activation = activation + if self.activation: + self.swish = MemoryEfficientSwish() if not onnx_export else Swish() + + def forward(self, x): + x = self.depthwise_conv(x) + x = self.pointwise_conv(x) + + if self.norm: + x = self.bn(x) + + if self.activation: + x = self.swish(x) + + return x + +class Classifier(nn.Module): + """ + modified by Zylo117 + """ + + def __init__(self, in_channels, num_anchors, num_classes, num_layers, pyramid_levels=5, onnx_export=False): + super(Classifier, self).__init__() + self.num_anchors = num_anchors + self.num_classes = num_classes + self.num_layers = num_layers + self.conv_list = nn.ModuleList( + [SeparableConvBlock(in_channels, in_channels, norm=False, activation=False) for i in range(num_layers)]) + self.bn_list = nn.ModuleList( + [nn.ModuleList([nn.BatchNorm2d(in_channels, momentum=0.1, eps=1e-3,track_running_stats=False) for i in range(num_layers)]) for j in + range(pyramid_levels)]) + self.header = SeparableConvBlock(in_channels, num_anchors * num_classes, norm=False, activation=False) + self.swish = MemoryEfficientSwish() if not onnx_export else Swish() + + def forward(self, inputs): + feats = [] + for feat, bn_list in zip(inputs, self.bn_list): + for i, bn, conv in zip(range(self.num_layers), bn_list, self.conv_list): + feat = conv(feat) + feat = bn(feat) + feat = self.swish(feat) + feat = self.header(feat) + + feat = feat.permute(0, 2, 3, 1) + feat = feat.contiguous().view(feat.shape[0], feat.shape[1], feat.shape[2], self.num_anchors, + self.num_classes) + feat = feat.contiguous().view(feat.shape[0], -1, self.num_classes) + + feats.append(feat) + + feats = torch.cat(feats, dim=1) + feats = feats.sigmoid() + + return feats + +class Regressor2(nn.Module): + + def __init__(self, in_channels, num_anchors=9, num_layers=3, pyramid_levels=3) -> None: + super(Regressor2, self).__init__() + + self.conv_list = nn.Sequential( + *[ConvBlock(in_channels, in_channels, norm_2=True, norm_1=False, activation=True) for i in range(num_layers)]) + + self.head_conv = ConvBlock(in_channels, num_anchors * 4, norm_1=False, norm_2=False, activation=False) + + self.act = nn.SiLU() + + + def forward(self, x): + feats = [] + for feat in x: # APPLY CSPNET + + feat = self.conv_list(feat) + feat = self.head_conv(feat) + + feat = feat.permute(0, 2, 3, 1) + feat = feat.contiguous().view(feat.shape[0], -1, 4) + + feats.append(feat) + feats = torch.cat(feats, dim=1) + + return feats + + + +class Classifier2(nn.Module): + + def __init__(self, in_channels, num_anchors, num_classes, num_layers=3, pyramid_levels=3) -> None: + super(Classifier2, self).__init__() + + self.num_anchors = num_anchors + self.num_classes = num_classes + self.num_layers = num_layers + + self.conv_list = nn.Sequential( + *[ConvBlock(in_channels, in_channels, norm_2=True, norm_1=False, activation=True) for i in range(num_layers)]) + + self.header = ConvBlock(in_channels, num_anchors * num_classes, norm_1=False, norm_2=False, activation=False) + + self.act = nn.SiLU() + + + + def forward(self, x): + + + feats = [] + for feat in x: # APPLY CSPNET + + feat = self.conv_list(feat) + feat = self.header(feat) + + feat = feat.permute(0, 2, 3, 1) + feat = feat.contiguous().view(feat.shape[0], feat.shape[1], feat.shape[2], self.num_anchors, + self.num_classes) + feat = feat.contiguous().view(feat.shape[0], -1, self.num_classes) + + feats.append(feat) + feats = torch.cat(feats, dim=1) + feats = feats.sigmoid() + return feats + + +class ScalableCSPResBlock(nn.Module): + + def __init__(self, in_ch=512, num_basic_layers=1) -> None: + super(ScalableCSPResBlock, self).__init__() + + self.conv1 = nn.Conv2d(in_channels=in_ch, out_channels=in_ch * 2, kernel_size=3, stride=1, padding=1) + self.bn1 = nn.LayerNorm(in_ch * 2) + self.conv2 = nn.Conv2d(in_channels=in_ch * 2, out_channels=in_ch * 2, kernel_size=1, stride=1, padding=0) + self.bn2 = nn.LayerNorm(in_ch * 2) + basic_layers = [] + + for _ in range(num_basic_layers): + basic_layers.append(BasicBlock(in_ch, in_ch)) + + self.basic_layers = nn.Sequential(*basic_layers) + self.transition = nn.Conv2d(in_ch * 2, in_ch, kernel_size=1, stride=1, padding=0) + self.act = nn.SiLU() + + def forward(self, x): + x = self.conv1(x).permute(0, 2, 3, 1) + x = self.bn1(x).permute(0, 3, 1, 2) + x = self.act(x) + x = self.conv2(x).permute(0, 2, 3, 1) + x = self.bn2(x).permute(0, 3, 1, 2) + x = self.act(x) + + + xs, xb = x.split(x.shape[1] // 2, dim=1) + xb = self.basic_layers(xb) + + out = self.transition(torch.cat([xs, xb], dim=1)) + + return out + + +class BasicBlock(nn.Module): + def __init__(self, in_channels, out_channels, stride=1, downsample=False): + super(BasicBlock, self).__init__() + + self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=(3, 3), stride=stride, padding=(1, 1), bias=False) + self.bn1 = nn.LayerNorm(out_channels) + self.act = nn.SiLU() + self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), + bias=False) + self.bn2 = nn.LayerNorm(out_channels) + + if downsample: + self.downsample = nn.Sequential( + nn.Conv2d(in_channels, out_channels, kernel_size=(1, 1), stride=(2, 2), bias=False), + nn.LayerNorm(out_channels) + ) + else: + self.downsample = None + self.out_channels = out_channels + + def forward(self, x): + + out = self.conv1(x).permute(0, 2, 3, 1) + out = self.bn1(out).permute(0, 3, 1, 2) + out = self.act(out) + out = self.conv2(out).permute(0, 2, 3, 1) + out = self.bn2(out).permute(0, 3, 1, 2) + + if self.downsample is not None: + x = self.downsample(x) + + out = x + out + out = self.act(out) + + return out + + +class Conv(nn.Module): + def __init__(self, in_ch, out_ch, k_size=3, s=1, p=0, upsample=False, act=nn.Mish(), norm=True, bias=True) -> None: + super(Conv, self).__init__() + + self.norm = norm + self.conv = nn.Conv2d(in_channels=in_ch, out_channels=out_ch, kernel_size=k_size, stride=s, padding=p, bias=bias) + + if norm: + self.bn = nn.LayerNorm(out_ch) + + + def forward(self, x): + + if self.norm: + x = self.conv(x).permute(0, 2, 3, 1) + x = self.bn(x).permute(0, 3, 1, 2) + return x + + else: + + return self.conv(x) + + +class ConvBlock(nn.Module): + + def __init__(self, in_channel, out_channel, norm_1, norm_2, activation) -> None: + super().__init__() + + self.activation = activation + self.conv1 = Conv(in_ch=in_channel, out_ch=in_channel, k_size=3, s=1, p=1, norm=norm_1, bias=False) + + self.conv2 = Conv(in_ch=in_channel, out_ch=out_channel, k_size=1, s=1, norm=norm_2) + + if self.activation: + self.act = nn.SiLU() + #self.bn = nn.LayerNorm(out_channel) + + def forward(self, x): + + x = self.conv1(x) + x = self.conv2(x) + + if self.activation: + return self.act(x) + + return x + + +class Anchors(nn.Module): + """ + adapted and modified from https://github.com/google/automl/blob/master/efficientdet/anchors.py by Zylo117 + """ + + def __init__(self, anchor_scale=4., pyramid_levels=None, **kwargs): + super().__init__() + self.anchor_scale = anchor_scale + + if pyramid_levels is None: + self.pyramid_levels = [3, 4, 5, 6, 7] + else: + self.pyramid_levels = pyramid_levels + + self.strides = kwargs.get('strides', [2 ** x for x in self.pyramid_levels]) + self.scales = np.array(kwargs.get('scales', [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])) + self.ratios = kwargs.get('ratios', [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]) + + self.last_anchors = {} + self.last_shape = None + + def forward(self, image, dtype=torch.float32): + """Generates multiscale anchor boxes. + + Args: + image_size: integer number of input image size. The input image has the + same dimension for width and height. The image_size should be divided by + the largest feature stride 2^max_level. + anchor_scale: float number representing the scale of size of the base + anchor to the feature stride 2^level. + anchor_configs: a dictionary with keys as the levels of anchors and + values as a list of anchor configuration. + + Returns: + anchor_boxes: a numpy array with shape [N, 4], which stacks anchors on all + feature levels. + Raises: + ValueError: input size must be the multiple of largest feature stride. + """ + image_shape = image.shape[2:] + + if image_shape == self.last_shape and image.device in self.last_anchors: + return self.last_anchors[image.device] + + if self.last_shape is None or self.last_shape != image_shape: + self.last_shape = image_shape + + if dtype == torch.float16: + dtype = np.float16 + else: + dtype = np.float32 + + boxes_all = [] + for stride in self.strides: + boxes_level = [] + for scale, ratio in itertools.product(self.scales, self.ratios): + if image_shape[1] % stride != 0: + raise ValueError('input size must be divided by the stride.') + base_anchor_size = self.anchor_scale * stride * scale + anchor_size_x_2 = base_anchor_size * ratio[0] / 2.0 + anchor_size_y_2 = base_anchor_size * ratio[1] / 2.0 + + x = np.arange(stride / 2, image_shape[1], stride) + y = np.arange(stride / 2, image_shape[0], stride) + xv, yv = np.meshgrid(x, y) + xv = xv.reshape(-1) + yv = yv.reshape(-1) + + # y1,x1,y2,x2 + boxes = np.vstack((yv - anchor_size_y_2, xv - anchor_size_x_2, + yv + anchor_size_y_2, xv + anchor_size_x_2)) + boxes = np.swapaxes(boxes, 0, 1) + boxes_level.append(np.expand_dims(boxes, axis=1)) + # concat anchors on the same level to the reshape NxAx4 + boxes_level = np.concatenate(boxes_level, axis=1) + boxes_all.append(boxes_level.reshape([-1, 4])) + + anchor_boxes = np.vstack(boxes_all) + + anchor_boxes = torch.from_numpy(anchor_boxes.astype(dtype)).to(image.device) + anchor_boxes = anchor_boxes.unsqueeze(0) + + # save it for later use to reduce overhead + self.last_anchors[image.device] = anchor_boxes + return anchor_boxes \ No newline at end of file diff --git a/training.yaml b/training.yaml old mode 100644 new mode 100755 diff --git a/utils/utils.py b/utils/utils.py new file mode 100755 index 0000000..539fdf9 --- /dev/null +++ b/utils/utils.py @@ -0,0 +1,314 @@ +# Author: Zylo117 + +import math +import os +import uuid +from glob import glob +from typing import Union + +import cv2 +import numpy as np +import torch +import webcolors +from torch import nn +from torch.nn.init import _calculate_fan_in_and_fan_out, _no_grad_normal_ +from torchvision.ops.boxes import batched_nms + +from utils.sync_batchnorm import SynchronizedBatchNorm2d + + +def invert_affine(metas: Union[float, list, tuple], preds): + for i in range(len(preds)): + if len(preds[i]['rois']) == 0: + continue + else: + if metas is float: + preds[i]['rois'][:, [0, 2]] = preds[i]['rois'][:, [0, 2]] / metas + preds[i]['rois'][:, [1, 3]] = preds[i]['rois'][:, [1, 3]] / metas + else: + new_w, new_h, old_w, old_h, padding_w, padding_h = metas[i] + preds[i]['rois'][:, [0, 2]] = preds[i]['rois'][:, [0, 2]] / (new_w / old_w) + preds[i]['rois'][:, [1, 3]] = preds[i]['rois'][:, [1, 3]] / (new_h / old_h) + return preds + + +def aspectaware_resize_padding(image, width, height, interpolation=None, means=None): + old_h, old_w, c = image.shape + if old_w > old_h: + new_w = width + new_h = int(width / old_w * old_h) + else: + new_w = int(height / old_h * old_w) + new_h = height + + canvas = np.zeros((height, height, c), np.float32) + if means is not None: + canvas[...] = means + + if new_w != old_w or new_h != old_h: + if interpolation is None: + image = cv2.resize(image, (new_w, new_h)) + else: + image = cv2.resize(image, (new_w, new_h), interpolation=interpolation) + + padding_h = height - new_h + padding_w = width - new_w + + if c > 1: + canvas[:new_h, :new_w] = image + else: + if len(image.shape) == 2: + canvas[:new_h, :new_w, 0] = image + else: + canvas[:new_h, :new_w] = image + + return canvas, new_w, new_h, old_w, old_h, padding_w, padding_h, + + +def preprocess(*image_path, max_size=512, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): + ori_imgs = [cv2.imread(img_path) for img_path in image_path] + normalized_imgs = [(img[..., ::-1] / 255 - mean) / std for img in ori_imgs] + imgs_meta = [aspectaware_resize_padding(img, max_size, max_size, + means=None) for img in normalized_imgs] + framed_imgs = [img_meta[0] for img_meta in imgs_meta] + framed_metas = [img_meta[1:] for img_meta in imgs_meta] + + return ori_imgs, framed_imgs, framed_metas + + +def preprocess_video(*frame_from_video, max_size=512, mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229)): + ori_imgs = frame_from_video + normalized_imgs = [(img[..., ::-1] / 255 - mean) / std for img in ori_imgs] + imgs_meta = [aspectaware_resize_padding(img, max_size, max_size, + means=None) for img in normalized_imgs] + framed_imgs = [img_meta[0] for img_meta in imgs_meta] + framed_metas = [img_meta[1:] for img_meta in imgs_meta] + + return ori_imgs, framed_imgs, framed_metas + + +def postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold): + transformed_anchors = regressBoxes(anchors, regression) + transformed_anchors = clipBoxes(transformed_anchors, x) + scores = torch.max(classification, dim=2, keepdim=True)[0] + scores_over_thresh = (scores > threshold)[:, :, 0] + out = [] + for i in range(x.shape[0]): + if scores_over_thresh[i].sum() == 0: + out.append({ + 'rois': np.array(()), + 'class_ids': np.array(()), + 'scores': np.array(()), + }) + continue + + classification_per = classification[i, scores_over_thresh[i, :], ...].permute(1, 0) + transformed_anchors_per = transformed_anchors[i, scores_over_thresh[i, :], ...] + scores_per = scores[i, scores_over_thresh[i, :], ...] + scores_, classes_ = classification_per.max(dim=0) + anchors_nms_idx = batched_nms(transformed_anchors_per, scores_per[:, 0], classes_, iou_threshold=iou_threshold) + + if anchors_nms_idx.shape[0] != 0: + classes_ = classes_[anchors_nms_idx] + scores_ = scores_[anchors_nms_idx] + boxes_ = transformed_anchors_per[anchors_nms_idx, :] + + out.append({ + 'rois': boxes_.cpu().numpy(), + 'class_ids': classes_.cpu().numpy(), + 'scores': scores_.cpu().numpy(), + }) + else: + out.append({ + 'rois': np.array(()), + 'class_ids': np.array(()), + 'scores': np.array(()), + }) + + return out + + +def display(preds, imgs, obj_list, imshow=True, imwrite=False): + for i in range(len(imgs)): + if len(preds[i]['rois']) == 0: + continue + + imgs[i] = imgs[i].copy() + + for j in range(len(preds[i]['rois'])): + (x1, y1, x2, y2) = preds[i]['rois'][j].astype(np.int) + obj = obj_list[preds[i]['class_ids'][j]] + score = float(preds[i]['scores'][j]) + + plot_one_box(imgs[i], [x1, y1, x2, y2], label=obj, score=score, + color=color_list[get_index_label(obj, obj_list)]) + if imshow: + cv2.imshow('img', imgs[i]) + cv2.waitKey(0) + + if imwrite: + os.makedirs('test/', exist_ok=True) + cv2.imwrite(f'test/{uuid.uuid4().hex}.jpg', imgs[i]) + + +def replace_w_sync_bn(m): + for var_name in dir(m): + target_attr = getattr(m, var_name) + if type(target_attr) == torch.nn.BatchNorm2d: + num_features = target_attr.num_features + eps = target_attr.eps + momentum = target_attr.momentum + affine = target_attr.affine + + # get parameters + running_mean = target_attr.running_mean + running_var = target_attr.running_var + if affine: + weight = target_attr.weight + bias = target_attr.bias + + setattr(m, var_name, + SynchronizedBatchNorm2d(num_features, eps, momentum, affine)) + + target_attr = getattr(m, var_name) + # set parameters + target_attr.running_mean = running_mean + target_attr.running_var = running_var + if affine: + target_attr.weight = weight + target_attr.bias = bias + + for var_name, children in m.named_children(): + replace_w_sync_bn(children) + + +class CustomDataParallel(nn.DataParallel): + """ + force splitting data to all gpus instead of sending all data to cuda:0 and then moving around. + """ + + def __init__(self, module, num_gpus): + super().__init__(module) + self.num_gpus = num_gpus + + def scatter(self, inputs, kwargs, device_ids): + # More like scatter and data prep at the same time. The point is we prep the data in such a way + # that no scatter is necessary, and there's no need to shuffle stuff around different GPUs. + devices = ['cuda:' + str(x) for x in range(self.num_gpus)] + splits = inputs[0].shape[0] // self.num_gpus + + if splits == 0: + raise Exception('Batchsize must be greater than num_gpus.') + + return [(inputs[0][splits * device_idx: splits * (device_idx + 1)].to(f'cuda:{device_idx}', non_blocking=True), + inputs[1][splits * device_idx: splits * (device_idx + 1)].to(f'cuda:{device_idx}', non_blocking=True)) + for device_idx in range(len(devices))], \ + [kwargs] * len(devices) + + +def get_last_weights(weights_path): + weights_path = glob(weights_path + f'/*.pth') + weights_path = sorted(weights_path, + key=lambda x: int(x.rsplit('_')[-1].rsplit('.')[0]), + reverse=True)[0] + print(f'using weights {weights_path}') + return weights_path + + +def init_weights(model): + for name, module in model.named_modules(): + is_conv_layer = isinstance(module, nn.Conv2d) + + if is_conv_layer: + if "conv_list" in name or "header" in name: + variance_scaling_(module.weight.data) + else: + nn.init.kaiming_uniform_(module.weight.data) + + if module.bias is not None: + if "classifier.header" in name: + bias_value = -np.log((1 - 0.01) / 0.01) + torch.nn.init.constant_(module.bias, bias_value) + else: + module.bias.data.zero_() + + +def variance_scaling_(tensor, gain=1.): + # type: (Tensor, float) -> Tensor + r""" + initializer for SeparableConv in Regressor/Classifier + reference: https://keras.io/zh/initializers/ VarianceScaling + """ + fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) + std = math.sqrt(gain / float(fan_in)) + + return _no_grad_normal_(tensor, 0., std) + + +STANDARD_COLORS = [ + 'LawnGreen', 'Chartreuse', 'Aqua', 'Beige', 'Azure', 'BlanchedAlmond', 'Bisque', + 'Aquamarine', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', + 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', + 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', + 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', + 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', + 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', + 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', + 'Lavender', 'LavenderBlush', 'AliceBlue', 'LemonChiffon', 'LightBlue', + 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', + 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', + 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', + 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', + 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', + 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', + 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', + 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', + 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', + 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', + 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', + 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', + 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', + 'WhiteSmoke', 'Yellow', 'YellowGreen' +] + + +def from_colorname_to_bgr(color): + rgb_color = webcolors.name_to_rgb(color) + result = (rgb_color.blue, rgb_color.green, rgb_color.red) + return result + + +def standard_to_bgr(list_color_name): + standard = [] + for i in range(len(list_color_name) - 36): # -36 used to match the len(obj_list) + standard.append(from_colorname_to_bgr(list_color_name[i])) + return standard + + +def get_index_label(label, obj_list): + index = int(obj_list.index(label)) + return index + + +def plot_one_box(img, coord, label=None, score=None, color=None, line_thickness=None): + tl = line_thickness or int(round(0.001 * max(img.shape[0:2]))) # line thickness + color = color + c1, c2 = (int(coord[0]), int(coord[1])), (int(coord[2]), int(coord[3])) + cv2.rectangle(img, c1, c2, color, thickness=tl) + if label: + tf = max(tl - 2, 1) # font thickness + s_size = cv2.getTextSize(str('{:.0%}'.format(score)), 0, fontScale=float(tl) / 3, thickness=tf)[0] + t_size = cv2.getTextSize(label, 0, fontScale=float(tl) / 3, thickness=tf)[0] + c2 = c1[0] + t_size[0] + s_size[0] + 15, c1[1] - t_size[1] - 3 + cv2.rectangle(img, c1, c2, color, -1) # filled + cv2.putText(img, '{}: {:.0%}'.format(label, score), (c1[0], c1[1] - 2), 0, float(tl) / 3, [0, 0, 0], + thickness=tf, lineType=cv2.FONT_HERSHEY_SIMPLEX) + + +color_list = standard_to_bgr(STANDARD_COLORS) + + +def boolean_string(s): + if s not in {'False', 'True'}: + raise ValueError('Not a valid boolean string') + return s == 'True'