You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hi there, I'm trying to reproducing the experimental results (IS score and FID) on STL10 dataset.
As described in the paper, the image resolution on STL10 dataset is 48x48.
However, in this repo you didn't provide the definition of 48x48 (both D and G).
So I create my own D and G at 48x48 resolution as bellow:
D
import chainer
from chainer import functions as F
from source.links.sn_embed_id import SNEmbedID
from source.links.sn_linear import SNLinear
from dis_models.resblocks import Block, OptimizedBlock
class SNResNetProjectionDiscriminator(chainer.Chain):
def __init__(self, ch=64, n_classes=0, activation=F.relu):
super(SNResNetProjectionDiscriminator, self).__init__()
self.activation = activation
with self.init_scope():
self.block1 = OptimizedBlock(3, ch)
self.block2 = Block(ch, 2*ch, activation=activation, downsample=True)
self.block3 = Block(2*ch, 4*ch, activation=activation, downsample=True)
self.block4 = Block(4*ch, 8*ch, activation=activation, downsample=True)
self.block5 = Block(8*ch, 16*ch, activation=activation, downsample=False)
self.l5 = SNLinear(16*ch, 1, initialW=chainer.initializers.GlorotUniform(), nobias=True)
if n_classes > 0:
self.l_y = SNEmbedID(n_classes, ch, initialW=chainer.initializers.GlorotUniform())
def __call__(self, x, y=None):
h = x
h = self.block1(h)
h = self.block2(h)
h = self.block3(h)
h = self.block4(h)
h = self.block5(h)
h = self.activation(h)
# Global average pooling
h = F.sum(h, axis=(2, 3))
output = self.l5(h)
if y is not None:
w_y = self.l_y(y)
output += F.sum(w_y * h, axis=1, keepdims=True)
return output
G
import chainer
import chainer.links as L
from chainer import functions as F
from gen_models.resblocks import Block
from source.miscs.random_samples import sample_categorical, sample_continuous
class ResNetGenerator(chainer.Chain):
def __init__(self, ch=512, dim_z=128, bottom_width=6, activation=F.relu, n_classes=0, distribution="normal"):
super(ResNetGenerator, self).__init__()
self.bottom_width = bottom_width
self.activation = activation
self.distribution = distribution
self.dim_z = dim_z
self.n_classes = n_classes
with self.init_scope():
self.l1 = L.Linear(dim_z, (bottom_width ** 2) * ch, initialW=chainer.initializers.GlorotUniform())
self.block2 = Block(ch, int(ch / 2), activation=activation, upsample=True, n_classes=n_classes)
self.block3 = Block(int(ch / 2), int(ch /4), activation=activation, upsample=True, n_classes=n_classes)
self.block4 = Block(int(ch /4), int(ch / 8), activation=activation, upsample=True, n_classes=n_classes)
self.b5 = L.BatchNormalization(int(ch / 8))
self.c5 = L.Convolution2D(int(ch / 8), 3, ksize=3, stride=1, pad=1, initialW=chainer.initializers.GlorotUniform())
def sample_z(self, batchsize=64):
return sample_continuous(self.dim_z, batchsize, distribution=self.distribution, xp=self.xp)
def sample_y(self, batchsize=64):
return sample_categorical(self.n_classes, batchsize, distribution="uniform", xp=self.xp)
def __call__(self, batchsize=64, z=None, y=None):
if z is None:
z = sample_continuous(self.dim_z, batchsize, distribution=self.distribution, xp=self.xp)
if y is None:
y = sample_categorical(self.n_classes, batchsize, distribution="uniform", xp=self.xp) if self.n_classes > 0 else None
if (y is not None) and z.shape[0] != y.shape[0]:
raise ValueError('z.shape[0] != y.shape[0]')
h = z
h = self.l1(h)
h = F.reshape(h, (h.shape[0], -1, self.bottom_width, self.bottom_width))
h = self.block2(h, y)
h = self.block3(h, y)
h = self.block4(h, y)
h = self.b5(h)
h = self.activation(h)
h = F.tanh(self.c5(h))
return h
And other hyper-parameters we strictly follow the original paper. The yaml configuration is:
I trained my model with a 2-gpu work-station, each GPU handles 128 images per iteration.
So the effective batch-size is 128x2=256.
While in your paper the effective batch-size is 64x4.
With this configuration, the best IS I can got is 7.8, far away from 9.10 in your paper.
So could some give some tips how can I reproduce the 9.10 IS in the paper?
The text was updated successfully, but these errors were encountered:
Hi there, I'm trying to reproducing the experimental results (IS score and FID) on STL10 dataset.
As described in the paper, the image resolution on STL10 dataset is
48x48
.However, in this repo you didn't provide the definition of
48x48
(both D and G).So I create my own D and G at
48x48
resolution as bellow:And other hyper-parameters we strictly follow the original paper. The yaml configuration is:
I trained my model with a 2-gpu work-station, each GPU handles
128
images per iteration.So the effective batch-size is
128x2=256
.While in your paper the effective batch-size is
64x4
.With this configuration, the best IS I can got is
7.8
, far away from9.10
in your paper.So could some give some tips how can I reproduce the
9.10
IS in the paper?The text was updated successfully, but these errors were encountered: