This repository has been archived by the owner on Jan 1, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 71
/
Copy path4_example_mae.py
61 lines (51 loc) · 1.52 KB
/
4_example_mae.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# --------------------------------------------------------
# References:
# mae: https://github.com/facebookresearch/mae
# --------------------------------------------------------
# MAE uses timm for its ViT implementation.
# This is an example of how to patch MAE with ToMe (make sure to set r somewhere too).
# Place these functions in https://github.com/facebookresearch/mae/blob/main/models_vit.py
import tome
def vit_base_patch16(**kwargs):
model = VisionTransformer(
patch_size=16,
embed_dim=768,
depth=12,
num_heads=12,
mlp_ratio=4,
qkv_bias=True,
norm_layer=partial(nn.LayerNorm, eps=1e-6),
**kwargs,
)
tome.patch.mae(model)
return model
def vit_large_patch16(**kwargs):
model = VisionTransformer(
patch_size=16,
embed_dim=1024,
depth=24,
num_heads=16,
mlp_ratio=4,
qkv_bias=True,
norm_layer=partial(nn.LayerNorm, eps=1e-6),
**kwargs,
)
tome.patch.mae(model)
return model
def vit_huge_patch14(**kwargs):
model = VisionTransformer(
patch_size=14,
embed_dim=1280,
depth=32,
num_heads=16,
mlp_ratio=4,
qkv_bias=True,
norm_layer=partial(nn.LayerNorm, eps=1e-6),
**kwargs,
)
tome.patch.mae(model)
return model