Spaces:
Runtime error
Runtime error
| """ VoVNet (V1 & V2) | |
| Papers: | |
| * `An Energy and GPU-Computation Efficient Backbone Network` - https://arxiv.org/abs/1904.09730 | |
| * `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667 | |
| Looked at https://github.com/youngwanLEE/vovnet-detectron2 & | |
| https://github.com/stigma0617/VoVNet.pytorch/blob/master/models_vovnet/vovnet.py | |
| for some reference, rewrote most of the code. | |
| Hacked together by / Copyright 2020 Ross Wightman | |
| """ | |
| from typing import List | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD | |
| from .registry import register_model | |
| from .helpers import build_model_with_cfg | |
| from .layers import ConvBnAct, SeparableConvBnAct, BatchNormAct2d, ClassifierHead, DropPath,\ | |
| create_attn, create_norm_act, get_norm_act_layer | |
| # model cfgs adapted from https://github.com/youngwanLEE/vovnet-detectron2 & | |
| # https://github.com/stigma0617/VoVNet.pytorch/blob/master/models_vovnet/vovnet.py | |
| model_cfgs = dict( | |
| vovnet39a=dict( | |
| stem_chs=[64, 64, 128], | |
| stage_conv_chs=[128, 160, 192, 224], | |
| stage_out_chs=[256, 512, 768, 1024], | |
| layer_per_block=5, | |
| block_per_stage=[1, 1, 2, 2], | |
| residual=False, | |
| depthwise=False, | |
| attn='', | |
| ), | |
| vovnet57a=dict( | |
| stem_chs=[64, 64, 128], | |
| stage_conv_chs=[128, 160, 192, 224], | |
| stage_out_chs=[256, 512, 768, 1024], | |
| layer_per_block=5, | |
| block_per_stage=[1, 1, 4, 3], | |
| residual=False, | |
| depthwise=False, | |
| attn='', | |
| ), | |
| ese_vovnet19b_slim_dw=dict( | |
| stem_chs=[64, 64, 64], | |
| stage_conv_chs=[64, 80, 96, 112], | |
| stage_out_chs=[112, 256, 384, 512], | |
| layer_per_block=3, | |
| block_per_stage=[1, 1, 1, 1], | |
| residual=True, | |
| depthwise=True, | |
| attn='ese', | |
| ), | |
| ese_vovnet19b_dw=dict( | |
| stem_chs=[64, 64, 64], | |
| stage_conv_chs=[128, 160, 192, 224], | |
| stage_out_chs=[256, 512, 768, 1024], | |
| layer_per_block=3, | |
| block_per_stage=[1, 1, 1, 1], | |
| residual=True, | |
| depthwise=True, | |
| attn='ese', | |
| ), | |
| ese_vovnet19b_slim=dict( | |
| stem_chs=[64, 64, 128], | |
| stage_conv_chs=[64, 80, 96, 112], | |
| stage_out_chs=[112, 256, 384, 512], | |
| layer_per_block=3, | |
| block_per_stage=[1, 1, 1, 1], | |
| residual=True, | |
| depthwise=False, | |
| attn='ese', | |
| ), | |
| ese_vovnet19b=dict( | |
| stem_chs=[64, 64, 128], | |
| stage_conv_chs=[128, 160, 192, 224], | |
| stage_out_chs=[256, 512, 768, 1024], | |
| layer_per_block=3, | |
| block_per_stage=[1, 1, 1, 1], | |
| residual=True, | |
| depthwise=False, | |
| attn='ese', | |
| ), | |
| ese_vovnet39b=dict( | |
| stem_chs=[64, 64, 128], | |
| stage_conv_chs=[128, 160, 192, 224], | |
| stage_out_chs=[256, 512, 768, 1024], | |
| layer_per_block=5, | |
| block_per_stage=[1, 1, 2, 2], | |
| residual=True, | |
| depthwise=False, | |
| attn='ese', | |
| ), | |
| ese_vovnet57b=dict( | |
| stem_chs=[64, 64, 128], | |
| stage_conv_chs=[128, 160, 192, 224], | |
| stage_out_chs=[256, 512, 768, 1024], | |
| layer_per_block=5, | |
| block_per_stage=[1, 1, 4, 3], | |
| residual=True, | |
| depthwise=False, | |
| attn='ese', | |
| ), | |
| ese_vovnet99b=dict( | |
| stem_chs=[64, 64, 128], | |
| stage_conv_chs=[128, 160, 192, 224], | |
| stage_out_chs=[256, 512, 768, 1024], | |
| layer_per_block=5, | |
| block_per_stage=[1, 3, 9, 3], | |
| residual=True, | |
| depthwise=False, | |
| attn='ese', | |
| ), | |
| eca_vovnet39b=dict( | |
| stem_chs=[64, 64, 128], | |
| stage_conv_chs=[128, 160, 192, 224], | |
| stage_out_chs=[256, 512, 768, 1024], | |
| layer_per_block=5, | |
| block_per_stage=[1, 1, 2, 2], | |
| residual=True, | |
| depthwise=False, | |
| attn='eca', | |
| ), | |
| ) | |
| model_cfgs['ese_vovnet39b_evos'] = model_cfgs['ese_vovnet39b'] | |
| model_cfgs['ese_vovnet99b_iabn'] = model_cfgs['ese_vovnet99b'] | |
| def _cfg(url=''): | |
| return { | |
| 'url': url, 'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (7, 7), | |
| 'crop_pct': 0.875, 'interpolation': 'bicubic', | |
| 'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD, | |
| 'first_conv': 'stem.0.conv', 'classifier': 'head.fc', | |
| } | |
| default_cfgs = dict( | |
| vovnet39a=_cfg(url=''), | |
| vovnet57a=_cfg(url=''), | |
| ese_vovnet19b_slim_dw=_cfg(url=''), | |
| ese_vovnet19b_dw=_cfg( | |
| url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/ese_vovnet19b_dw-a8741004.pth'), | |
| ese_vovnet19b_slim=_cfg(url=''), | |
| ese_vovnet39b=_cfg( | |
| url='https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/ese_vovnet39b-f912fe73.pth'), | |
| ese_vovnet57b=_cfg(url=''), | |
| ese_vovnet99b=_cfg(url=''), | |
| eca_vovnet39b=_cfg(url=''), | |
| ese_vovnet39b_evos=_cfg(url=''), | |
| ese_vovnet99b_iabn=_cfg(url=''), | |
| ) | |
| class SequentialAppendList(nn.Sequential): | |
| def __init__(self, *args): | |
| super(SequentialAppendList, self).__init__(*args) | |
| def forward(self, x: torch.Tensor, concat_list: List[torch.Tensor]) -> torch.Tensor: | |
| for i, module in enumerate(self): | |
| if i == 0: | |
| concat_list.append(module(x)) | |
| else: | |
| concat_list.append(module(concat_list[-1])) | |
| x = torch.cat(concat_list, dim=1) | |
| return x | |
| class OsaBlock(nn.Module): | |
| def __init__(self, in_chs, mid_chs, out_chs, layer_per_block, residual=False, | |
| depthwise=False, attn='', norm_layer=BatchNormAct2d, act_layer=nn.ReLU, drop_path=None): | |
| super(OsaBlock, self).__init__() | |
| self.residual = residual | |
| self.depthwise = depthwise | |
| conv_kwargs = dict(norm_layer=norm_layer, act_layer=act_layer) | |
| next_in_chs = in_chs | |
| if self.depthwise and next_in_chs != mid_chs: | |
| assert not residual | |
| self.conv_reduction = ConvBnAct(next_in_chs, mid_chs, 1, **conv_kwargs) | |
| else: | |
| self.conv_reduction = None | |
| mid_convs = [] | |
| for i in range(layer_per_block): | |
| if self.depthwise: | |
| conv = SeparableConvBnAct(mid_chs, mid_chs, **conv_kwargs) | |
| else: | |
| conv = ConvBnAct(next_in_chs, mid_chs, 3, **conv_kwargs) | |
| next_in_chs = mid_chs | |
| mid_convs.append(conv) | |
| self.conv_mid = SequentialAppendList(*mid_convs) | |
| # feature aggregation | |
| next_in_chs = in_chs + layer_per_block * mid_chs | |
| self.conv_concat = ConvBnAct(next_in_chs, out_chs, **conv_kwargs) | |
| if attn: | |
| self.attn = create_attn(attn, out_chs) | |
| else: | |
| self.attn = None | |
| self.drop_path = drop_path | |
| def forward(self, x): | |
| output = [x] | |
| if self.conv_reduction is not None: | |
| x = self.conv_reduction(x) | |
| x = self.conv_mid(x, output) | |
| x = self.conv_concat(x) | |
| if self.attn is not None: | |
| x = self.attn(x) | |
| if self.drop_path is not None: | |
| x = self.drop_path(x) | |
| if self.residual: | |
| x = x + output[0] | |
| return x | |
| class OsaStage(nn.Module): | |
| def __init__(self, in_chs, mid_chs, out_chs, block_per_stage, layer_per_block, downsample=True, | |
| residual=True, depthwise=False, attn='ese', norm_layer=BatchNormAct2d, act_layer=nn.ReLU, | |
| drop_path_rates=None): | |
| super(OsaStage, self).__init__() | |
| if downsample: | |
| self.pool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) | |
| else: | |
| self.pool = None | |
| blocks = [] | |
| for i in range(block_per_stage): | |
| last_block = i == block_per_stage - 1 | |
| if drop_path_rates is not None and drop_path_rates[i] > 0.: | |
| drop_path = DropPath(drop_path_rates[i]) | |
| else: | |
| drop_path = None | |
| blocks += [OsaBlock( | |
| in_chs, mid_chs, out_chs, layer_per_block, residual=residual and i > 0, depthwise=depthwise, | |
| attn=attn if last_block else '', norm_layer=norm_layer, act_layer=act_layer, drop_path=drop_path) | |
| ] | |
| in_chs = out_chs | |
| self.blocks = nn.Sequential(*blocks) | |
| def forward(self, x): | |
| if self.pool is not None: | |
| x = self.pool(x) | |
| x = self.blocks(x) | |
| return x | |
| class VovNet(nn.Module): | |
| def __init__(self, cfg, in_chans=3, num_classes=1000, global_pool='avg', drop_rate=0., stem_stride=4, | |
| output_stride=32, norm_layer=BatchNormAct2d, act_layer=nn.ReLU, drop_path_rate=0.): | |
| """ VovNet (v2) | |
| """ | |
| super(VovNet, self).__init__() | |
| self.num_classes = num_classes | |
| self.drop_rate = drop_rate | |
| assert stem_stride in (4, 2) | |
| assert output_stride == 32 # FIXME support dilation | |
| stem_chs = cfg["stem_chs"] | |
| stage_conv_chs = cfg["stage_conv_chs"] | |
| stage_out_chs = cfg["stage_out_chs"] | |
| block_per_stage = cfg["block_per_stage"] | |
| layer_per_block = cfg["layer_per_block"] | |
| conv_kwargs = dict(norm_layer=norm_layer, act_layer=act_layer) | |
| # Stem module | |
| last_stem_stride = stem_stride // 2 | |
| conv_type = SeparableConvBnAct if cfg["depthwise"] else ConvBnAct | |
| self.stem = nn.Sequential(*[ | |
| ConvBnAct(in_chans, stem_chs[0], 3, stride=2, **conv_kwargs), | |
| conv_type(stem_chs[0], stem_chs[1], 3, stride=1, **conv_kwargs), | |
| conv_type(stem_chs[1], stem_chs[2], 3, stride=last_stem_stride, **conv_kwargs), | |
| ]) | |
| self.feature_info = [dict( | |
| num_chs=stem_chs[1], reduction=2, module=f'stem.{1 if stem_stride == 4 else 2}')] | |
| current_stride = stem_stride | |
| # OSA stages | |
| stage_dpr = torch.split(torch.linspace(0, drop_path_rate, sum(block_per_stage)), block_per_stage) | |
| in_ch_list = stem_chs[-1:] + stage_out_chs[:-1] | |
| stage_args = dict(residual=cfg["residual"], depthwise=cfg["depthwise"], attn=cfg["attn"], **conv_kwargs) | |
| stages = [] | |
| for i in range(4): # num_stages | |
| downsample = stem_stride == 2 or i > 0 # first stage has no stride/downsample if stem_stride is 4 | |
| stages += [OsaStage( | |
| in_ch_list[i], stage_conv_chs[i], stage_out_chs[i], block_per_stage[i], layer_per_block, | |
| downsample=downsample, drop_path_rates=stage_dpr[i], **stage_args) | |
| ] | |
| self.num_features = stage_out_chs[i] | |
| current_stride *= 2 if downsample else 1 | |
| self.feature_info += [dict(num_chs=self.num_features, reduction=current_stride, module=f'stages.{i}')] | |
| self.stages = nn.Sequential(*stages) | |
| self.head = ClassifierHead(self.num_features, num_classes, pool_type=global_pool, drop_rate=drop_rate) | |
| for n, m in self.named_modules(): | |
| if isinstance(m, nn.Conv2d): | |
| nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') | |
| elif isinstance(m, nn.BatchNorm2d): | |
| nn.init.constant_(m.weight, 1.) | |
| nn.init.constant_(m.bias, 0.) | |
| elif isinstance(m, nn.Linear): | |
| nn.init.zeros_(m.bias) | |
| def get_classifier(self): | |
| return self.head.fc | |
| def reset_classifier(self, num_classes, global_pool='avg'): | |
| self.head = ClassifierHead(self.num_features, num_classes, pool_type=global_pool, drop_rate=self.drop_rate) | |
| def forward_features(self, x): | |
| x = self.stem(x) | |
| return self.stages(x) | |
| def forward(self, x): | |
| x = self.forward_features(x) | |
| return self.head(x) | |
| def _create_vovnet(variant, pretrained=False, **kwargs): | |
| return build_model_with_cfg( | |
| VovNet, variant, pretrained, default_cfg=default_cfgs[variant], model_cfg=model_cfgs[variant], | |
| feature_cfg=dict(flatten_sequential=True), **kwargs) | |
| def vovnet39a(pretrained=False, **kwargs): | |
| return _create_vovnet('vovnet39a', pretrained=pretrained, **kwargs) | |
| def vovnet57a(pretrained=False, **kwargs): | |
| return _create_vovnet('vovnet57a', pretrained=pretrained, **kwargs) | |
| def ese_vovnet19b_slim_dw(pretrained=False, **kwargs): | |
| return _create_vovnet('ese_vovnet19b_slim_dw', pretrained=pretrained, **kwargs) | |
| def ese_vovnet19b_dw(pretrained=False, **kwargs): | |
| return _create_vovnet('ese_vovnet19b_dw', pretrained=pretrained, **kwargs) | |
| def ese_vovnet19b_slim(pretrained=False, **kwargs): | |
| return _create_vovnet('ese_vovnet19b_slim', pretrained=pretrained, **kwargs) | |
| def ese_vovnet39b(pretrained=False, **kwargs): | |
| return _create_vovnet('ese_vovnet39b', pretrained=pretrained, **kwargs) | |
| def ese_vovnet57b(pretrained=False, **kwargs): | |
| return _create_vovnet('ese_vovnet57b', pretrained=pretrained, **kwargs) | |
| def ese_vovnet99b(pretrained=False, **kwargs): | |
| return _create_vovnet('ese_vovnet99b', pretrained=pretrained, **kwargs) | |
| def eca_vovnet39b(pretrained=False, **kwargs): | |
| return _create_vovnet('eca_vovnet39b', pretrained=pretrained, **kwargs) | |
| # Experimental Models | |
| def ese_vovnet39b_evos(pretrained=False, **kwargs): | |
| def norm_act_fn(num_features, **nkwargs): | |
| return create_norm_act('EvoNormSample', num_features, jit=False, **nkwargs) | |
| return _create_vovnet('ese_vovnet39b_evos', pretrained=pretrained, norm_layer=norm_act_fn, **kwargs) | |
| def ese_vovnet99b_iabn(pretrained=False, **kwargs): | |
| norm_layer = get_norm_act_layer('iabn') | |
| return _create_vovnet( | |
| 'ese_vovnet99b_iabn', pretrained=pretrained, norm_layer=norm_layer, act_layer=nn.LeakyReLU, **kwargs) | |