experiments/v01_drums_vel_perf/model.yaml

random_seed: 42

input_encoding:
  class: !!python/name:museflow.encodings.PianoRollEncoding
  normalize: True
  sampling_frequency: 4  # 4 samples per beat  (tempo is forced to 60 BPM)
output_encoding:
  class: !!python/name:museflow.encodings.PerformanceEncoding
  use_velocity: True
  velocity_unit: 16
  time_unit: 0.08333333333333333  # 1/12 of a beat  (tempo is forced to 60 BPM)
  use_all_off_event: True
  use_drum_events: True
  use_magenta: True

normalize_velocity:
  mean: 64.25399574283072
  variance: 432.9165195560093
  # computed on the training set; ignored by default (unless run() is called with normalize_velocity=True)

model:
  encoder_cnn:
    2d_layers:
      - class: !!python/name:tensorflow.layers.Conv2D
        filters: 32
        kernel_size: [12,12]
        padding: same
        activation: !!python/name:tensorflow.nn.elu
      - class: !!python/name:tensorflow.layers.MaxPooling2D
        pool_size: [2,2]
        strides: [2,2]
      - class: !!python/name:tensorflow.layers.Conv2D
        filters: 32
        kernel_size: [4,4]
        padding: same
        activation: !!python/name:tensorflow.nn.elu
      - class: !!python/name:tensorflow.layers.MaxPooling2D
        pool_size: [2,4]
        strides: [2,4]
  encoder_rnn:
    forward_cell:
      num_units: 200
  style_encoder_cnn:
    1d_layers:
      - class: !!python/name:tensorflow.layers.Conv1D
        filters: 300
        kernel_size: 6
        padding: same
        activation: !!python/name:tensorflow.nn.elu
      - class: !!python/name:tensorflow.layers.MaxPooling1D
        pool_size: 2
        strides: 2
      - class: !!python/name:tensorflow.layers.Conv1D
        filters: 300
        kernel_size: 4
        padding: same
        activation: !!python/name:tensorflow.nn.elu
      - class: !!python/name:tensorflow.layers.MaxPooling1D
        pool_size: 2
        strides: 2
      - class: !!python/name:tensorflow.layers.Conv1D
        filters: 300
        kernel_size: 4
        padding: same
        activation: !!python/name:tensorflow.nn.elu
      - class: !!python/name:tensorflow.layers.MaxPooling1D
        pool_size: 2
        strides: 2
  style_encoder_rnn:
    forward_cell:
      num_units: 500
  attention_mechanism:
    class: !!python/name:tensorflow.contrib.seq2seq.BahdanauAttention
    num_units: 300
  embedding_layer:
    output_size: 300
  decoder:
    cell:
      num_units: 1024
    max_length: 2000  # for inference only

  training:
    lr_decay:
      class: !!python/name:tensorflow.train.exponential_decay
      learning_rate: 1.0e-3
      decay_steps: 3000
      decay_rate: 0.5
    max_gradient_norm: 0.001

trainer:
  logging_period: 50
  validation_period: 800

train_data:
  source_db_path: ../data/synth/train/final/shuf/all_except_drums.db
  target_db_path: ../data/synth/train/final/shuf/all.db
  metadata_path: ../data/synth/train/final/shuf/meta.json.gz
val_data:
  source_db_path: ../data/synth/val/final/all_except_drums.db
  target_db_path: ../data/synth/val/final/all.db
  metadata_path: ../data/synth/val/final/meta.json.gz

style_note_filters:
  Bass:
    instrument_re: "^BB Bass$"
  Piano:
    instrument_re: "^BB Piano$"
  Guitar:
    instrument_re: "^BB Guitar$"
  Strings:
    instrument_re: "^BB Strings$"
  Drums:
    instrument_re: "^BB Drums$"

data_prep:
  num_epochs: 1
  num_train_examples: 1559098  # 1/2 epoch
  train_batch_size: 64
  val_batch_size: 128
  shuffle_buffer_size: 2000
max_target_length: 300