-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathconfig.py
369 lines (336 loc) · 26.4 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
"""
Allows easy modification of all configuration parameters required to define,
train or evaluate a model.
This script is not intended to be run, it only describes parameters (see classes constructors).
After building the config instances, the update_dynamic_config_params(...) method
must be called to update some "dynamic" hyper-parameters which depend on some others.
This configuration is used when running train.py as main.
When running train_queue.py, configuration changes are relative to this config.py file.
When a run starts, this file is stored as a config.json file. To ensure easy restoration of
parameters, please only use simple types such as string, ints, floats, tuples (no lists) and dicts.
"""
import datetime
import pathlib
# The config_confidential.py file must be created by the user in the ./utils folder.
# It must contain the following fields:
# data_root_path, logs_root_dir,
# comet_api_key, comet_project_name, comet_workspace
from utils import config_confidential
# ===================================================================================================================
# ================================================= Model configuration =============================================
# ===================================================================================================================
class ModelConfig:
def __init__(self):
# ----------------------------------------------- Data ---------------------------------------------------
self.data_root_path = config_confidential.data_root_path
self.logs_root_dir = config_confidential.logs_root_dir
self.name = "dev" # experiment base name
# experiment run: different hyperparams, optimizer, etc... for a given exp
self.run_name = 'autoeval_00'
self.pretrained_VAE_checkpoint = \
self.logs_root_dir + "/hvae/8x1_freebits0.250__6notes_dimz256/checkpoint.tar"
#self.logs_root_dir + "/hvae/8x1_freebits0.125__3notes_dimz256/checkpoint.tar"
# self.pretrained_VAE_checkpoint = None # Uncomment this to train a full model from scratch
self.allow_erase_run = True # If True, a previous run with identical name will be erased before training
# Comet.ml logger (replaces Tensorboard)
self.comet_api_key = config_confidential.comet_api_key
self.comet_project_name = config_confidential.comet_project_name
self.comet_workspace = config_confidential.comet_workspace
self.comet_experiment_key = 'xxxxxxxx' # Will be set by cometwriter.py after experiment has been created
self.comet_tags = []
# ---------------------------------------- General Architecture --------------------------------------------
# See model/ladderencoder.py to view available architecture(s).
# Decoder architecture will be as symmetric as possible.
# 'specladder': ladder CNNs (cells, outputs from different levels) for spectrogram reconstruction
# also contains num of blocks and num of conv layers per block (e.g. 8x1)
# Arch args:
# '_adain' some BN layers are replaced by AdaIN (fed with a style vector w, dim_w < dim_z)
# '_att' self-attention in deep conv layers
# '_big' (small improvements but +50% GPU RAM usage), '_bigger'
# '_res' residual connections after each hidden strided conv layer (up/down sampling layers)
# '_depsep5x5' uses 5x5 depth-separable convolutional layers in each res block (requires at least 8x2)
# '_ln' uses LayerNorm instead of BatchNorm, '_wn' uses Weight Normalization attached to conv weights
# '_swish' uses Swish activations (SiLU) instead of LeakyReLU (negligible overhead)
self.vae_main_conv_architecture = 'specladder8x1_res_swish' # should be named "audio" architecture...
# Network plugged after sequential conv blocks (encoder) or before sequential conv blocks (decoder)
# E.g.: 'conv_1l_1x1' means regular convolution, 1 layer, 1x1 conv kernels
# 'lstm_2l_3x3' mean ConvLSTM, 2 layers, 3x3 conv kernels
# other args: '_gated' (for regular convolutions) seems very effective to improve the overall ELBO
# '_att' adds self-attention conv residuals at the beginning of shallow latent cells
# '_posenc' adds input positional information to LSTMs and to self-attention conv layers
self.vae_latent_extract_architecture = 'conv_1l_k1x1_gated'
# Number of latent levels increases the size of the shallowest latent feature maps, and increases the
# minimal dim_z requirement
# 2 latent levels allows dim_z close to 100
# 3 latent levels allows dim_z close to 350
# 4 latent levels allows dim_z close to 1500
self.vae_latent_levels = 1 # Currently useless, must remain 1
# Sets the family of decoder output probability distribution p_theta(x|z), e.g. :
# - 'gaussian_unitvariance' corresponds to the usual MSE reconstruction loss (up to a constant and factor)
self.audio_decoder_distribution = 'gaussian_unitvariance'
self.attention_gamma = 1.0 # Amount of self-attention added to (some) usual convolutional outputs
# Preset encoder/decoder architecture
# TODO description (base + options)
# '_ff': feed-forward, non-AR decoding - applicable to sequential models: RNN, Transformer (pos enc only)
# '_memmlp': doubles the number of Transformer decoder memory tokens using a "Res-MLP" on the latent vector
# -> seems to improves perfs a bit (lower latent loss, quite similar auto synth prog losses)
# '_fftoken': learned positional embeddings (input tokens) for a non-autoregressive transformer decoder
self.vae_preset_architecture = 'tfm_6l_ff_memmlp_fftoken_embednorm' # tfm_6l_memmlp_ff
# "before_latent_cell" (encoded preset will be the same size as encoded audio, added then processed together)
# or "after_latent_cell"" (encoded preset size will be 2*dim_z, and will be added to z_mu and z_sigma)
self.vae_preset_encode_add = "after_latent_cell"
# Size of the hidden representation of 1 synth parameter
self.preset_hidden_size = 256
# Distribution for modeling (discrete-)numerical synth param values; available options:
# - 'logistic_mixtX' where X is the number of mixture components (mu, s and pi learned for each component)
# - 'softmax' will learn values as categories - custom softmax (cardinality) for each param
# (categorical variables always use a softmaxed categorical distribution)
self.preset_decoder_numerical_distribution = 'logistic_mixt3'
# Describes how (if) the presets should be auto-encoded:
# - "no_encoding": presets are inferred from audio but are not encoded (not provided at
# encoder input). Corresponds to a Sound Matching (or automatic synth programming) task.
# - "combined_vae": preset is encoded with audio, their hidden representations are then summed or mixed
# together
# - TODO "asp+vae": hybrid method/training: TODO DOC
# - "aligned_vaes": the preset VAE and audio VAE are trained as independent models, but a loss
# (e.g. contrastive, Dkl, ... TODO TBD) is computed using the two latent representations
# - "no_audio": the preset alone is auto-encoded, audio is discarded
self.preset_ae_method = "combined_vae"
# --------------------------------------------- Latent space -----------------------------------------------
# If True, encoder output is reduced by 2 for 1 MIDI pitch and 1 velocity to be concat to the latent vector
self.concat_midi_to_z = None # See update_dynamic_config_params() - FIXME deprecated
# Latent space dimension ********* this dim is automatically set when using a Hierarchical VAE *************
self.dim_z = -1
self.approx_requested_dim_z = 256 # Hierarchical VAE will try to get close to this, will often be higher
# Latent flow architecture, e.g. 'realnvp_4l200' (4 flows, 200 hidden features per flow)
# - base architectures can be realnvp, maf, ...
# - set to None to disable latent space flow transforms: will build a BasicVAE or MMD-VAE
# - options: _BNinternal (batch norm between hidden MLPs, to compute transform coefficients),
# _BNbetween (between flow layers), _BNoutput (BN on the last two layers, or not)
self.latent_flow_arch = None # FIXME deprecated
# ------------------------------------------------ Audio -------------------------------------------------
# Spectrogram size cannot easily be modified - all CNN decoders should be re-written
self.note_duration = (3.0, 1.0)
self.sampling_rate = 16000 # 16000 for NSynth dataset compatibility
self.stft_args = (512, 256) # fft size and hop size
self.mel_bins = -1 # -1 disables Mel-scale spectrogram. Try: 257, 513, ...
# Spectrogram sizes @ 22.05 kHz:
# (513, 433): audio 5.0s, fft size 1024, fft hop 256
# (257, 347): audio 4.0s, fft size 512 (or fft 1024 w/ mel_bins 257), fft hop 256
# (513, 347): audio 4.0s, fft size 1024 (no mel), fft hop 256
# Sizes @ 16 kHz:
# (257, 251): audio 4.0s, fft size 512 (or fft 1024 w/ mel_bins 257), fft hop 256
self.spectrogram_size = (257, 251) # H x W. see data/dataset.py to retrieve this from audio/stft params
self.mel_f_limits = (0, 8000) # min/max Mel-spectrogram frequencies (librosa default 0:Fs/2)
# All notes that must be available for each instrument (even if we currently use only a subset of those notes)
self.required_dataset_midi_notes = ((41, 75), (48, 75), (56, 75), (63, 75), (56, 25), (56, 127))
# Tuple of (pitch, velocity) tuples. Using only 1 midi note is fine.
# self.midi_notes = ((56, 75), ) # Reference note: G#3 , intensity 75/127
self.midi_notes = ((41, 75), (48, 75), (56, 25), (56, 75), (56, 127), (63, 75)) # 6 notes
# self.midi_notes = ((41, 75), (56, 25), (56, 75), (56, 127), (63, 75)) # 5 notes
# self.midi_notes = ((41, 75), (56, 75), (56, 127)) # 3 notes (faster training)
self.main_midi_note_index = len(self.midi_notes) // 2 # 56, 75
self.stack_spectrograms = True # If True, dataset will feed multi-channel spectrograms to the encoder
# If True, each preset is presented several times per epoch (nb of train epochs must be reduced) such that the
# dataset size is increased (6x bigger with 6 MIDI notes) -> warmup and patience epochs must be scaled
self.increased_dataset_size = None # See update_dynamic_config_params()
self.spectrogram_min_dB = -120.0
self.input_audio_tensor_size = None # see update_dynamic_config_params()
# ---------------------------------- Synth (not used during pre-training) ----------------------------------
self.synth = 'dexed'
# Dexed-specific auto rename: '*' in 'al*_op*_lab*' will be replaced by the actual algos, operators and labels
self.synth_args_str = 'al*_op*_lab*' # Auto-generated string (see end of script)
self.synth_params_count = -1 # Will be set automatically - see data.build.get_full_and_split_datasets
# flags/values to describe the dataset to be used
self.dataset_labels = None # tuple of labels (e.g. ('harmonic', 'percussive')), or None to use all labels
# Dexed: Preset Algorithms, and activated Operators (Lists of ints, None to use all)
# Limited algorithms (non-symmetrical only): [1, 2, 7, 8, 9, 14, 28, 3, 4, 11, 16, 18]
# Other synth: ...?
self.dataset_synth_args = (None, [1, 2, 3, 4, 5, 6])
# Directory for saving metrics, samples, models, etc... see README.md
# ===================================================================================================================
# ======================================= Training procedure configuration ==========================================
# ===================================================================================================================
class TrainConfig:
def __init__(self):
self.pretrain_audio_only = False # Should we pre-train the audio+latent parts of the auto-encoder model only?
# Interpolation will be evaluated vs. a reference model, which must have been evaluated first
self.evaluate_interpolation_after_training = True # Parallel eval (long, CPU-intensive)
self.start_datetime = datetime.datetime.now().isoformat()
# 256 is okay for smaller conv structures - reduce to 64 to fit '_big' models into 24GB GPU RAM
self.minibatch_size = 64 # reduce for big models - also smaller N seems to improve VAE pretraining perfs...
self.main_cuda_device_idx = 0 # CUDA device for nonparallel operations (losses, ...)
self.test_holdout_proportion = 0.1 # This can be reduced without mixing the train and test subsets
self.k_folds = 9 # 10% for validation set, 80% for training
self.current_k_fold = 0 # k-folds are not used anymore, but we'll keep the training/validation/test splits
self.start_epoch = 0 # 0 means a restart (previous data erased). If > 0: will load the last saved checkpoint
# Total number of epochs (including previous training epochs). 275 for StepLR regression model training
self.n_epochs = 170 # See update_dynamic_config_params()
# The max ratio between the number of items from each synth/instrument used for each training epoch (e.g. Dexed
# has more than 30x more instruments than NSynth). All available data will always be used for validation.
self.pretrain_synths_max_imbalance_ratio = 10.0 # Set to -1 to disable the weighted sampler.
self.attention_gamma_warmup_period = 50
# ------------------------------------------------ Losses -------------------------------------------------
# Reconstruction loss: 'MSE' corresponds to free-mean, fixed-variance per-pixel Gaussian prob distributions.
# TODO 'WeightedMSE' allows to give a higher loss to some parts of spectrograms (e.g. attack, low freqs, ??)
self.reconstruction_loss = 'MSE'
# Latent regularization loss: 'Dkl' or 'MMD' for Basic VAE, 'logprob' or 'MMD' loss with flow-VAE
# 'MMD_determ_enc' also available: use a deterministic encoder
self.latent_loss = 'Dkl'
self.mmd_compensation_factor = 5.0 # Factor applied to MMD backprop losses only
self.mmd_num_estimates = 1 # Number of MMD estimates per batch (maybe increase if small batch size)
# Losses normalization allow to get losses in the same order of magnitude, but does not optimize the true ELBO.
# When un-normalized, the reconstruction loss (log-probability of a multivariate gaussian) is orders of
# magnitude bigger than other losses. Must remain True to ease convergence (too big recons loss)
self.normalize_losses = True # Normalize reconstruction and regression losses over their dimension
# To compare different latent sizes, Dkl or MMD losses are not normalized such that each latent
# coordinate always 'has' the same amount of regularization
self.normalize_latent_loss = False
# Here, beta = beta_vae / Dx in the beta-VAE formulation (ICLR 2017)
# where Dx is the input dimensionality (257 * 251 = 64 507 for 1 spectrogram)
# E.g. here: beta = 1 corresponds to beta_VAE = 6.5 e+4
# ELBO loss is obtained by using beta = 1.55 e-5 (for 1 spectrogram)
self.beta = 5.0e-6 # FIXME With 6 specs, 1.6e-5 corresponds to beta=6
# Should not be zero with Normalizing Flows or with a multi-layer structure for extracting latent values
# (risk of a very unstable training)
self.beta_start_value = 1.6e-8
# Epochs of warmup increase from start_value to beta TODO increase to reduce posterior collapse
self.beta_warmup_epochs = 50 # Used during both pre-train and fine-tuning
# VAE Kullback-Leibler divergence weighting during warmup, to try to prevent posterior collapse of some
# latent levels (see NVAE, NeurIPS 2020). Leads to higher latent losses during warmup.
self.dkl_auto_gamma = False # If True, latent groups with small minibatch-KLDs will be assigned a smaller loss
# Free-bits from the IAF paper (NeurIPS 16) https://arxiv.org/abs/1606.04934
# Our VAE uses 2D feature maps as latent variables, and entire channels seem to collapse
# (a single pixel collapsing has not been observed). The "free bits" min Dkl constraint is then applied
# to each latent channel, no to hierarchical latent groups
# TODO increase? collapse during fine-tuning TODO upaate doc
self.latent_free_bits = 0.250 # this is a *single-pixel* min KLD value
# - - - Synth parameters losses - - -
# - General options
# applied to the preset loss FIXME because MSE loss of the VAE is much lower (approx. 1e-2)
self.params_loss_compensation_factor = 0.5
self.params_loss_exclude_useless = False # if True, sets to 0.0 the loss related to 0-volume oscillators
self.params_loss_with_permutations = False # Backprop loss only; monitoring losses always use True
# - Cross-Entropy loss
self.preset_CE_label_smoothing = 0.0 # torch.nn.CrossEntropyLoss: label smoothing since PyTorch 1.10
self.preset_CE_use_weights = False
# Probability to use the model's outputs during training (AR decoder)
self.preset_sched_sampling_max_p = 0.0 # Set to zero for FF decoder
# self.preset_sched_sampling_start_epoch = 40 # TODO IMPLEMENT Required for the embeddings to train properly?
self.preset_sched_sampling_warmup_epochs = 100
# Alignment loss:
# - 'kld': q(z|preset) regularized towards q(z|audio) through KLD
# - 'symmetric_kld'
# TODO optional arg:
# - implement "__sgaudio" (stop audio-VAE gradient)
self.preset_alignment_criterion = 'symmetric_kld'
# ------------------------------------------- Optimizer + scheduler -------------------------------------------
# Different optimizer parameters can be used for the pre-trained AE and the regression networks
# (see below: 'ae' or 'reg' prefixes or dict keys)
self.optimizer = 'Adam'
self.adam_betas = (0.9, 0.999) # default (0.9, 0.999)
# Maximal learning rate (reached after warmup, then reduced on plateaus)
# LR decreased if non-normalized losses (which are expected to be 9e4 times bigger with a 257x347 spectrogram)
# e-9 LR with e+4 (non-normalized) loss does not allow any train (vanishing grad?)
self.initial_learning_rate = {'audio': 2e-4, 'latent': 2e-4, 'preset': 2e-4}
# FIXME RESET TO 1e-1?
self.initial_audio_latent_lr_factor_after_pretrain = 1.0 # audio-related LR reduced after pre-train
# Learning rate warmup (see https://arxiv.org/abs/1706.02677). Same warmup period for all schedulers.
self.lr_warmup_epochs = 20 # Will be decreased /2 during pre-training (stable CNN structure)
self.lr_warmup_start_factor = 0.05 # Will be increased 2x during pre-training
self.scheduler_name = 'StepLR' # can use ReduceLROnPlateau during pre-train (stable CNN), StepLR for reg model
self.scheduler_lr_factor = 0.4
# - - - StepLR scheduler options - - -
self.scheduler_period = 75 # resnets train quite fast
# Set a longer patience with smaller datasets and quite unstable trains
# See update_dynamic_config_params(). 16k samples dataset: set to 10
self.scheduler_patience = 20
self.scheduler_cooldown = 20
self.scheduler_threshold = 1e-4
# Training considered "dead" when dynamic LR reaches this ratio of a the initial LR
# Early stop is currently used for the regression loss only, for the 'ReduceLROnPlateau' scheduler only.
self.early_stop_lr_ratio = 1e-4
self.early_stop_lr_threshold = None # See update_dynamic_config_params()
# -------------------------------------------- Regularization -----------------------------------------------
# WD definitely helps for regularization but significantly impairs results. 1e-4 seems to be a good compromise
# for both Basic and MMD VAEs (without regression net). 3e-6 allows for the lowest reconstruction error.
self.weight_decay = 1e-5
self.ae_fc_dropout = 0.0
# FIXME use dropout
self.preset_cat_dropout = 0.0 # Applied only to subnets which do not handle value regression tasks
self.preset_internal_dropout = 0.0
# -------------------------------------------- Logs, figures, ... ---------------------------------------------
self.validate_period = 5 # Period between validations (very long w/ autoregressive transformers)
self.plot_period = 20 # Period (in epochs) for plotting graphs into Tensorboard (quite CPU and SSD expensive)
self.large_plots_min_period = 100 # Min num of epochs between plots (e.g. embeddings, approx. 80MB .tsv files)
self.plot_epoch_0 = False
self.verbosity = 1 # 0: no console output --> 3: fully-detailed per-batch console output
self.init_security_pause = 0.0 # Short pause before erasing an existing run
# Number of logged audio and spectrograms for a given epoch
self.logged_samples_count = 4 # See update_dynamic_config_params()
# -------------------------------------- Performance and Profiling ------------------------------------------
self.dataloader_pin_memory = False
self.dataloader_persistent_workers = True
self.profiler_enabled = False
self.profiler_epoch_to_record = 0 # The profiler will record a few minibatches of this given epoch
self.profiler_kwargs = {'record_shapes': True, 'with_stack': True}
self.profiler_schedule_kwargs = {'skip_first': 5, 'wait': 1, 'warmup': 1, 'active': 3, 'repeat': 2}
def update_dynamic_config_params(model_config: ModelConfig, train_config: TrainConfig):
""" This function must be called before using any train attribute """
# TODO perform config coherence checks in this function
if train_config.pretrain_audio_only:
model_config.comet_tags.append('pretrain')
model_config.params_regression_architecture = 'None'
model_config.preset_ae_method = None
train_config.evaluate_interpolation_after_training = False
train_config.lr_warmup_epochs = train_config.lr_warmup_epochs // 2
train_config.lr_warmup_start_factor *= 2
else:
train_config.initial_learning_rate['audio'] *= train_config.initial_audio_latent_lr_factor_after_pretrain
train_config.initial_learning_rate['latent'] *= train_config.initial_audio_latent_lr_factor_after_pretrain
# stack_spectrograms must be False for 1-note datasets - security check
model_config.stack_spectrograms = model_config.stack_spectrograms and (len(model_config.midi_notes) > 1)
# Artificially increased data size? FIXME remove - we'll always stack now
model_config.increased_dataset_size = (len(model_config.midi_notes) > 1) and not model_config.stack_spectrograms
model_config.concat_midi_to_z = (len(model_config.midi_notes) > 1) and not model_config.stack_spectrograms
# Mini-batch size can be smaller for the last mini-batches and/or during evaluation
model_config.input_audio_tensor_size = \
(train_config.minibatch_size, 1 if not model_config.stack_spectrograms else len(model_config.midi_notes),
model_config.spectrogram_size[0], model_config.spectrogram_size[1])
# Dynamic train hyper-params
train_config.early_stop_lr_threshold = {k: lr * train_config.early_stop_lr_ratio
for k, lr in train_config.initial_learning_rate.items()}
train_config.logged_samples_count = max(train_config.logged_samples_count, len(model_config.midi_notes))
# Train hyper-params (epochs counts) that should be increased when using a subset of the dataset
if model_config.dataset_synth_args[0] is not None: # Limited Dexed algorithms? TODO handle non-dexed synth
train_config.n_epochs = 700
train_config.lr_warmup_epochs = 10
train_config.scheduler_patience = 10
train_config.scheduler_cooldown = 10
train_config.beta_warmup_epochs = 40
# Train hyper-params (epochs counts) that should be reduced with artificially increased datasets
# Augmented datasets introduce 6x more backprops <=> 6x more epochs. Patience and cooldown must however remain >= 2
if model_config.increased_dataset_size: # Stacked spectrogram do not increase the dataset size (number of items)
# FIXME handle the dicts
N = len(model_config.midi_notes) - 1 # reduce a bit less that dataset's size increase
train_config.n_epochs = 1 + train_config.n_epochs // N
train_config.lr_warmup_epochs = 1 + train_config.lr_warmup_epochs // N
train_config.scheduler_patience = 1 + train_config.scheduler_patience // N
train_config.scheduler_cooldown = 1 + train_config.scheduler_cooldown // N
train_config.beta_warmup_epochs = 1 + train_config.beta_warmup_epochs // N
# Hparams that may be useless, depending on some other hparams
if train_config.pretrain_audio_only or model_config.preset_ae_method != 'aligned_vaes':
train_config.preset_alignment_criterion = None
# Automatic model.synth string update - to summarize this info into 1 Tensorboard string hparam
if model_config.synth == "dexed":
if model_config.dataset_synth_args[0] is not None: # Algorithms
model_config.synth_args_str = model_config.synth_args_str.replace(
"al*", "al" + '.'.join([str(alg) for alg in model_config.dataset_synth_args[0]]))
if model_config.dataset_synth_args[1] is not None: # Operators
model_config.synth_args_str = model_config.synth_args_str.replace(
"_op*", "_op" + ''.join([str(op) for op in model_config.dataset_synth_args[1]]))
if model_config.dataset_labels is not None: # Labels
model_config.synth_args_str = model_config.synth_args_str.replace(
"_lab*", '_' + '_'.join([label[0:4] for label in model_config.dataset_labels]))
else:
raise NotImplementedError("Unknown synth prefix for model.synth '{}'".format(model_config.synth))