model_config.yaml


model_config:
  lags: 256 ## this is for maximum lags, above which overflow will occur
  embedding_dim: 768
  n_blocks: 15
  pool_size: 16
  number_of_heads: 8
  number_ts: 264
  #number_of_clusters: None
  ## As long as really want to experiment, do not touch any part of below ##
  ## Upsampling_Details ##
  #conv_activation: F.gelu
  #conv_FFN_activation: F.gelu
  conv_dropout_FFN: 0.5
  conv_dropout_linear: 0.5
  conv_FFN_bias: True
  conv_FFN_expansion_size: 2
  conv_bias: True
  # Attention_Block_Details #
  attention_head_dropout: 0.5
  attention_projection_dropout: 0.5
  attention_FFN_dropout: 0.5
  #attention_FFN_activation: F.gelu
  attention_FFN_bias: True
  attention_FFN_expansion_size: 4
  
project_name:
  project_name: "One_For_All_Cluster_Training"
  group_name: "EMA_positional_embedding_2"

  
trainer_config:
  save_every: 1
  max_epochs: 2
  snapshot_name: "small_model_12_heads"
  snapshot_dir: "model"
  compile_model: False
  

optimizer_config:
  lr: 0.0001
  weight_decay: 0.1
  #momentum: 0.9

scheduler_config:
  T_0: 5
  eta_min: 0.0000088


data:
  train_path:
   file: "data/array_train.dat"
   length_file: "data/lengthsarray_train.dat"
   file_names: "data/names_array_train.txt"
   lags: 128 # 128 -> 1 prediction, we will do
  val_path:
   file: "data/array_test.dat"
   length_file: "data/lengthsarray_test.dat"
   file_names: "data/names_array_test.txt"
   lags: 128 # 128 -> 1 prediction, we will do

  
  train_data_details:
   batch_size:  64
   num_workers: 12
 # shuffle: True
   pin_memory: True
   persistent_workers: True
   prefetch_factor: 2

  val_data_details:
   batch_size: 128
   num_workers: 4
#   shuffle: False
   pin_memory: True
   drop_last: True