Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cant run launch_I128_bs256x4.sh #8

Open
arnaghizadeh opened this issue Oct 6, 2020 · 0 comments
Open

Cant run launch_I128_bs256x4.sh #8

arnaghizadeh opened this issue Oct 6, 2020 · 0 comments

Comments

@arnaghizadeh
Copy link

Hi, I could run launch_C10.sh but not launch_I128_bs256x4.sh. I get this following error, can you please help?

 sh scripts/mlaunch_I128_bs256x4.sh 
{'dataset': 'I128_hdf5', 'augment': False, 'num_workers': 8, 'pin_memory': True, 'shuffle': True, 'load_in_mem': False, 'use_multiepoch_sampler': True, 'dict_decay': 0.8, 'commitment': 15.0, 'discrete_layer': '0123', 'dict_size': 10, 'model': 'BigGAN', 'G_param': 'SN', 'D_param': 'SN', 'G_ch': 64, 'D_ch': 64, 'G_depth': 1, 'D_depth': 1, 'D_wide': True, 'G_shared': False, 'shared_dim': 0, 'dim_z': 120, 'z_var': 1.0, 'hier': True, 'cross_replica': False, 'mybn': False, 'G_nl': 'inplace_relu', 'D_nl': 'inplace_relu', 'G_attn': '64', 'D_attn': '64', 'norm_style': 'bn', 'seed': 0, 'G_init': 'ortho', 'D_init': 'ortho', 'skip_init': False, 'G_lr': 0.0001, 'D_lr': 0.0004, 'G_B1': 0.0, 'D_B1': 0.0, 'G_B2': 0.999, 'D_B2': 0.999, 'batch_size': 256, 'G_batch_size': 0, 'num_G_accumulations': 4, 'num_D_steps': 1, 'num_D_accumulations': 4, 'split_D': False, 'num_epochs': 100, 'parallel': True, 'G_fp16': False, 'D_fp16': False, 'D_mixed_precision': False, 'G_mixed_precision': False, 'accumulate_stats': False, 'num_standing_accumulations': 16, 'G_eval_mode': False, 'save_every': 1000, 'num_save_copies': 2, 'num_best_copies': 5, 'which_best': 'FID', 'no_fid': False, 'test_every': 1000, 'num_inception_images': 50000, 'hashname': False, 'base_root': '', 'data_root': '/filer/tmp2/an499_tmp2', 'weights_root': 'weights', 'logs_root': 'logs', 'samples_root': 'samples', 'pbar': 'mine', 'name_suffix': 'quant', 'experiment_name': '', 'config_from_name': False, 'ema': True, 'ema_decay': 0.9999, 'use_ema': True, 'ema_start': 20000, 'adam_eps': 1e-06, 'BN_eps': 1e-05, 'SN_eps': 1e-06, 'num_G_SVs': 1, 'num_D_SVs': 1, 'num_G_SV_itrs': 1, 'num_D_SV_itrs': 1, 'G_ortho': 0.0, 'D_ortho': 0.0, 'toggle_grads': True, 'which_train_fn': 'GAN', 'load_weights': '', 'resume': False, 'logstyle': '%3.3e', 'log_G_spectra': False, 'log_D_spectra': False, 'sv_log_interval': 10}
Experiment name is BigGAN_I128_hdf5_seed0_Gch64_Dch64_bs256_nDa4_nGa4_Gattn64_Dattn64_Commit15.00_Layer0123_Dicsz10_Dicdecay0.80_quant
Adding attention layer in G at resolution 64
Param count for Gs initialized parameters: 40247811
Adding attention layer in D at resolution 64
Param count for Ds initialized parameters: 39448257
Preparing EMA for G with decay of 0.9999
Adding attention layer in G at resolution 64
Initializing EMA parameters to be source parameters...
Generator(
  (activation): ReLU(inplace=True)
  (shared): identity()
  (linear): SNLinear(in_features=20, out_features=16384, bias=True)
  (blocks): ModuleList(
    (0): ModuleList(
      (0): GBlock(
        (activation): ReLU(inplace=True)
        (conv1): SNConv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv2): SNConv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv_sc): SNConv2d(1024, 1024, kernel_size=(1, 1), stride=(1, 1))
        (bn1): ccbn(
          out: 1024, in: 1000, cross_replica=False
          (gain): Embedding(1000, 1024)
          (bias): Embedding(1000, 1024)
        )
        (bn2): ccbn(
          out: 1024, in: 1000, cross_replica=False
          (gain): Embedding(1000, 1024)
          (bias): Embedding(1000, 1024)
        )
      )
    )
    (1): ModuleList(
      (0): GBlock(
        (activation): ReLU(inplace=True)
        (conv1): SNConv2d(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv2): SNConv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv_sc): SNConv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1))
        (bn1): ccbn(
          out: 1024, in: 1000, cross_replica=False
          (gain): Embedding(1000, 1024)
          (bias): Embedding(1000, 1024)
        )
        (bn2): ccbn(
          out: 512, in: 1000, cross_replica=False
          (gain): Embedding(1000, 512)
          (bias): Embedding(1000, 512)
        )
      )
    )
    (2): ModuleList(
      (0): GBlock(
        (activation): ReLU(inplace=True)
        (conv1): SNConv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv2): SNConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv_sc): SNConv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
        (bn1): ccbn(
          out: 512, in: 1000, cross_replica=False
          (gain): Embedding(1000, 512)
          (bias): Embedding(1000, 512)
        )
        (bn2): ccbn(
          out: 256, in: 1000, cross_replica=False
          (gain): Embedding(1000, 256)
          (bias): Embedding(1000, 256)
        )
      )
    )
    (3): ModuleList(
      (0): GBlock(
        (activation): ReLU(inplace=True)
        (conv1): SNConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv2): SNConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv_sc): SNConv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
        (bn1): ccbn(
          out: 256, in: 1000, cross_replica=False
          (gain): Embedding(1000, 256)
          (bias): Embedding(1000, 256)
        )
        (bn2): ccbn(
          out: 128, in: 1000, cross_replica=False
          (gain): Embedding(1000, 128)
          (bias): Embedding(1000, 128)
        )
      )
      (1): Attention(
        (theta): SNConv2d(128, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (phi): SNConv2d(128, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (g): SNConv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (o): SNConv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      )
    )
    (4): ModuleList(
      (0): GBlock(
        (activation): ReLU(inplace=True)
        (conv1): SNConv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv2): SNConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv_sc): SNConv2d(128, 64, kernel_size=(1, 1), stride=(1, 1))
        (bn1): ccbn(
          out: 128, in: 1000, cross_replica=False
          (gain): Embedding(1000, 128)
          (bias): Embedding(1000, 128)
        )
        (bn2): ccbn(
          out: 64, in: 1000, cross_replica=False
          (gain): Embedding(1000, 64)
          (bias): Embedding(1000, 64)
        )
      )
    )
  )
  (output_layer): Sequential(
    (0): bn()
    (1): ReLU(inplace=True)
    (2): SNConv2d(64, 3, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
)
Discriminator(
  (activation): ReLU(inplace=True)
  (blocks): ModuleList(
    (0): ModuleList(
      (0): DBlock(
        (activation): ReLU(inplace=True)
        (downsample): AvgPool2d(kernel_size=2, stride=2, padding=0)
        (conv1): SNConv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv2): SNConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv_sc): SNConv2d(3, 64, kernel_size=(1, 1), stride=(1, 1))
      )
      (1): Quantize()
      (2): Attention(
        (theta): SNConv2d(64, 8, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (phi): SNConv2d(64, 8, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (g): SNConv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (o): SNConv2d(32, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      )
    )
    (1): ModuleList(
      (0): DBlock(
        (activation): ReLU(inplace=True)
        (downsample): AvgPool2d(kernel_size=2, stride=2, padding=0)
        (conv1): SNConv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv2): SNConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv_sc): SNConv2d(64, 128, kernel_size=(1, 1), stride=(1, 1))
      )
      (1): Quantize()
    )
    (2): ModuleList(
      (0): DBlock(
        (activation): ReLU(inplace=True)
        (downsample): AvgPool2d(kernel_size=2, stride=2, padding=0)
        (conv1): SNConv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv2): SNConv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv_sc): SNConv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
      )
      (1): Quantize()
    )
    (3): ModuleList(
      (0): DBlock(
        (activation): ReLU(inplace=True)
        (downsample): AvgPool2d(kernel_size=2, stride=2, padding=0)
        (conv1): SNConv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv2): SNConv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv_sc): SNConv2d(256, 512, kernel_size=(1, 1), stride=(1, 1))
      )
      (1): Quantize()
    )
    (4): ModuleList(
      (0): DBlock(
        (activation): ReLU(inplace=True)
        (downsample): AvgPool2d(kernel_size=2, stride=2, padding=0)
        (conv1): SNConv2d(512, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv2): SNConv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv_sc): SNConv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1))
      )
    )
    (5): ModuleList(
      (0): DBlock(
        (activation): ReLU(inplace=True)
        (conv1): SNConv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (conv2): SNConv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      )
    )
  )
  (linear): SNLinear(in_features=1024, out_features=1, bias=True)
  (embed): SNEmbedding(1000, 1024)
)
Number of params in G: 40247940 D: 39448258
Inception Metrics will be saved to logs/BigGAN_I128_hdf5_seed0_Gch64_Dch64_bs256_nDa4_nGa4_Gattn64_Dattn64_Commit15.00_Layer0123_Dicsz10_Dicdecay0.80_quant_log.jsonl
Training Metrics will be saved to logs/BigGAN_I128_hdf5_seed0_Gch64_Dch64_bs256_nDa4_nGa4_Gattn64_Dattn64_Commit15.00_Layer0123_Dicsz10_Dicdecay0.80_quant
Using dataset root location /filer/tmp2/an499_tmp2/ILSVRC128.hdf5
Using multiepoch sampler from start_itr 0...
Parallelizing Inception module...
Beginning training at epoch 0...
Length dataset output is 5000000
1/4883 (  0.00%) Traceback (most recent call last):
  File "train.py", line 227, in <module>
    main()
  File "train.py", line 224, in main
    run(config)
  File "train.py", line 184, in run
    metrics = train(x, y)
  File "/common/users/an499/papers/GGAN/GGAN_code/FQ-GAN/FQ-BigGAN/train_fns.py", line 45, in train
    x[counter], y[counter], train_G=False, split_D=config['split_D'])
  File "/common/users/an499/py36/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "/common/users/an499/py36/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 152, in forward
    outputs = self.parallel_apply(replicas, inputs, kwargs)
  File "/common/users/an499/py36/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 162, in parallel_apply
    return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
  File "/common/users/an499/py36/lib/python3.6/site-packages/torch/nn/parallel/parallel_apply.py", line 85, in parallel_apply
    output.reraise()
  File "/common/users/an499/py36/lib/python3.6/site-packages/torch/_utils.py", line 394, in reraise
    raise self.exc_type(msg)
IndexError: Caught IndexError in replica 0 on device 0.
Original Traceback (most recent call last):
  File "/common/users/an499/py36/lib/python3.6/site-packages/torch/nn/parallel/parallel_apply.py", line 60, in _worker
    output = module(*input, **kwargs)
  File "/common/users/an499/py36/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "/common/users/an499/papers/GGAN/GGAN_code/FQ-GAN/FQ-BigGAN/BigGAN.py", line 441, in forward
    G_z = self.G(z, self.G.shared(gy))
  File "/common/users/an499/py36/lib/python3.6/site-packages/torch/nn/modules/module.py", line 532, in __call__
    result = self.forward(*input, **kwargs)
  File "/common/users/an499/papers/GGAN/GGAN_code/FQ-GAN/FQ-BigGAN/BigGAN.py", line 240, in forward
    ys = [torch.cat([y, item], 1) for item in zs[1:]]
  File "/common/users/an499/papers/GGAN/GGAN_code/FQ-GAN/FQ-BigGAN/BigGAN.py", line 240, in <listcomp>
    ys = [torch.cat([y, item], 1) for item in zs[1:]]
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant