diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 4e1ef42d2..3bfabfc12 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -1,11 +1,16 @@ -# This workflows will upload a Python Package using Twine when a release is created +# This workflow will upload a Python Package using Twine when a release is created # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + name: Upload Python Package on: release: - types: [created] + types: [published] jobs: deploy: @@ -21,11 +26,11 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install setuptools wheel twine - - name: Build and publish - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - run: | - python setup.py sdist bdist_wheel - twine upload dist/* + pip install build + - name: Build package + run: python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/byol_pytorch/byol_pytorch.py b/byol_pytorch/byol_pytorch.py index 05e85b976..d750bd6d6 100644 --- a/byol_pytorch/byol_pytorch.py +++ b/byol_pytorch/byol_pytorch.py @@ -5,6 +5,7 @@ import torch from torch import nn import torch.nn.functional as F +import torch.distributed as dist from torchvision import transforms as T @@ -37,6 +38,10 @@ def set_requires_grad(model, val): for p in model.parameters(): p.requires_grad = val +def MaybeSyncBatchnorm(is_distributed = None): + is_distributed = default(is_distributed, dist.is_initialized() and dist.get_world_size() > 1) + return nn.SyncBatchNorm if is_distributed else nn.BatchNorm1d + # loss fn def loss_fn(x, y): @@ -75,24 +80,24 @@ def update_moving_average(ema_updater, ma_model, current_model): # MLP class for projector and predictor -def MLP(dim, projection_size, hidden_size=4096): +def MLP(dim, projection_size, hidden_size=4096, sync_batchnorm=None): return nn.Sequential( nn.Linear(dim, hidden_size), - nn.BatchNorm1d(hidden_size), + MaybeSyncBatchnorm(sync_batchnorm)(hidden_size), nn.ReLU(inplace=True), nn.Linear(hidden_size, projection_size) ) -def SimSiamMLP(dim, projection_size, hidden_size=4096): +def SimSiamMLP(dim, projection_size, hidden_size=4096, sync_batchnorm=None): return nn.Sequential( nn.Linear(dim, hidden_size, bias=False), - nn.BatchNorm1d(hidden_size), + MaybeSyncBatchnorm(sync_batchnorm)(hidden_size), nn.ReLU(inplace=True), nn.Linear(hidden_size, hidden_size, bias=False), - nn.BatchNorm1d(hidden_size), + MaybeSyncBatchnorm(sync_batchnorm)(hidden_size), nn.ReLU(inplace=True), nn.Linear(hidden_size, projection_size, bias=False), - nn.BatchNorm1d(projection_size, affine=False) + MaybeSyncBatchnorm(sync_batchnorm)(projection_size, affine=False) ) # a wrapper class for the base neural network @@ -100,7 +105,7 @@ def SimSiamMLP(dim, projection_size, hidden_size=4096): # and pipe it into the projecter and predictor nets class NetWrapper(nn.Module): - def __init__(self, net, projection_size, projection_hidden_size, layer = -2, use_simsiam_mlp = False): + def __init__(self, net, projection_size, projection_hidden_size, layer = -2, use_simsiam_mlp = False, sync_batchnorm = None): super().__init__() self.net = net self.layer = layer @@ -110,6 +115,7 @@ def __init__(self, net, projection_size, projection_hidden_size, layer = -2, use self.projection_hidden_size = projection_hidden_size self.use_simsiam_mlp = use_simsiam_mlp + self.sync_batchnorm = sync_batchnorm self.hidden = {} self.hook_registered = False @@ -137,7 +143,7 @@ def _register_hook(self): def _get_projector(self, hidden): _, dim = hidden.shape create_mlp_fn = MLP if not self.use_simsiam_mlp else SimSiamMLP - projector = create_mlp_fn(dim, self.projection_size, self.projection_hidden_size) + projector = create_mlp_fn(dim, self.projection_size, self.projection_hidden_size, sync_batchnorm = self.sync_batchnorm) return projector.to(hidden) def get_representation(self, x): @@ -178,7 +184,8 @@ def __init__( augment_fn = None, augment_fn2 = None, moving_average_decay = 0.99, - use_momentum = True + use_momentum = True, + sync_batchnorm = None ): super().__init__() self.net = net @@ -205,7 +212,14 @@ def __init__( self.augment1 = default(augment_fn, DEFAULT_AUG) self.augment2 = default(augment_fn2, self.augment1) - self.online_encoder = NetWrapper(net, projection_size, projection_hidden_size, layer=hidden_layer, use_simsiam_mlp=not use_momentum) + self.online_encoder = NetWrapper( + net, + projection_size, + projection_hidden_size, + layer = hidden_layer, + use_simsiam_mlp = not use_momentum, + sync_batchnorm = sync_batchnorm + ) self.use_momentum = use_momentum self.target_encoder = None diff --git a/setup.py b/setup.py index 59303a342..86e0cbd01 100644 --- a/setup.py +++ b/setup.py @@ -3,12 +3,13 @@ setup( name = 'byol-pytorch', packages = find_packages(exclude=['examples']), - version = '0.6.0', + version = '0.7.0', license='MIT', description = 'Self-supervised contrastive learning made simple', author = 'Phil Wang', author_email = 'lucidrains@gmail.com', url = 'https://github.com/lucidrains/byol-pytorch', + long_description_content_type = 'text/markdown', keywords = [ 'self-supervised learning', 'artificial intelligence'