attn_drop=0.2
batch_size=1024
buffer_size=2048
chunk_size=8
dataset="cifar10"
embed_dim=128
epochs=100
ffn_drop=0.2
image_size=48
input_shape=[32, 32, 3]
learning_rate=0.0001
mixed_precision=true
num_classes=10
num_heads=1
num_layers=6
patch_size=4
r=2
train_slice=40000
weight_decay=0.0001