berenslab · alex404 · Oct 30, 2024 · Oct 2, 2024 · Oct 2, 2024 · Oct 25, 2024
diff --git a/main.py b/main.py
@@ -3,22 +3,20 @@
 import os
 import sys
 import warnings
-from typing import Dict, List, cast
 
 import hydra
 import torch
 from hydra.utils import instantiate
 from omegaconf import DictConfig, OmegaConf
 
 from retinal_rl.framework_interface import TrainingFramework
-from retinal_rl.models.brain import Brain
 from retinal_rl.rl.sample_factory.sf_framework import SFFramework
 from runner.analyze import analyze
 from runner.dataset import get_datasets
 from runner.initialize import initialize
 from runner.sweep import launch_sweep
 from runner.train import train
-from runner.util import assemble_neural_circuits, delete_results
+from runner.util import create_brain, delete_results
 
 # Load the eval resolver for OmegaConf
 OmegaConf.register_new_resolver("eval", eval)
@@ -27,6 +25,9 @@
 # Hydra entry point
 @hydra.main(config_path="config/base", config_name="config", version_base=None)
 def _program(cfg: DictConfig):
+    #TODO: Instead of doing checks of the config here, we should implement
+    # sth like the configstore which ensures config parameters are present
+
     if cfg.command == "clean":
         delete_results(cfg)
         sys.exit(0)
@@ -37,23 +38,14 @@ def _program(cfg: DictConfig):
 
     device = torch.device(cfg.system.device)
 
-    sensors = OmegaConf.to_container(cfg.brain.sensors, resolve=True)
-    sensors = cast(Dict[str, List[int]], sensors)
-
-    connections = OmegaConf.to_container(cfg.brain.connections, resolve=True)
-    connections = cast(List[List[str]], connections)
-
-    connectome, circuits = assemble_neural_circuits(
-        cfg.brain.circuits, sensors, connections
-    )
-
-    brain = Brain(circuits, sensors, connectome).to(device)
+    brain = create_brain(cfg.brain).to(device)
 
-    if hasattr(cfg, "optimizer"):
-        optimizer = instantiate(cfg.optimizer.optimizer, brain.parameters())
+    optimizer = instantiate(cfg.optimizer.optimizer, brain.parameters())
+    if hasattr(cfg.optimizer, "objective"):
         objective = instantiate(cfg.optimizer.objective, brain=brain)
+        # TODO: RL framework currently can't use objective
     else:
-        warnings.warn("No optimizer config specified, is that wanted?")
+        warnings.warn("No objective specified, is that wanted?")
 
     if cfg.command == "scan":
         print(brain.scan())

diff --git a/resources/config_templates/user/brain/feedforward.yaml b/resources/config_templates/user/brain/feedforward.yaml
@@ -0,0 +1,23 @@
+name: feedforward
+sensors:
+  vision:
+    - 3
+    - ${dataset.vision_height}
+    - ${dataset.vision_width}
+connections:
+  - ["vision", "encoder"]
+  - ["encoder", "action_decoder"]
+circuits:
+  encoder:
+    _target_: retinal_rl.models.circuits.convolutional.ConvolutionalEncoder
+    num_layers: 3
+    num_channels: [4,8,16]
+    kernel_size: 6
+    stride: 2
+    activation: ${activation}
+  action_decoder:
+    _target_: retinal_rl.models.circuits.fully_connected.FullyConnected
+    output_shape: ${action_decoder_out}
+    hidden_units: ${latent_dimension}
+    activation: ${activation}
+
diff --git a/resources/config_templates/user/dataset/rl-apples.yaml b/resources/config_templates/user/dataset/rl-apples.yaml
@@ -0,0 +1,6 @@
+name: rl-apples
+
+env_name: gathering-apples
+vision_width: 160
+vision_height:  120
+input_satiety: true
diff --git a/resources/config_templates/user/experiment/gathering-apples.yaml b/resources/config_templates/user/experiment/gathering-apples.yaml
@@ -0,0 +1,17 @@
+# @package _global_
+defaults:
+  - _self_
+  - override /dataset: rl-apples
+  - override /brain: feedforward
+  - override /optimizer: rl-base
+
+framework: rl
+
+### Interpolation Parameters ###
+
+# This is a free list of parameters that can be interpolated by the subconfigs
+# in sweep, dataset, brain, and optimizer. A major use for this is interpolating
+# values in the subconfigs, and then looping over them in a sweep.
+activation: "elu"
+latent_dimension: [2048,1024]
+action_decoder_out: [512]
diff --git a/resources/config_templates/user/optimizer/rl-base.yaml b/resources/config_templates/user/optimizer/rl-base.yaml
@@ -0,0 +1,7 @@
+# The optimizer to use
+optimizer: # torch.optim Class and parameters
+  _target_: torch.optim.Adam
+  lr: 0.0003
+
+# The objective function
+# TODO: Implement in RL and update config
diff --git a/retinal_rl/rl/analysis/statistics.py b/retinal_rl/rl/analysis/statistics.py
@@ -1,21 +1,22 @@
-from typing import Dict, List, Tuple
+import math
 import warnings
+from typing import Dict, List, Optional, Tuple
 
 import numpy as np
 import torch
-import torch.nn as nn
 from captum.attr import NeuronGradient
 from numpy.typing import NDArray
-from torch import Tensor
+from torch import Tensor, nn
 from torch.utils.data import Dataset
+from tqdm import tqdm
+from typing_extensions import deprecated
 
 from retinal_rl.models.brain import Brain
 from retinal_rl.models.circuits.convolutional import ConvolutionalEncoder
-from retinal_rl.models.util import encoder_out_size, rf_size_and_start
-from tqdm import tqdm
-import math
+from retinal_rl.util import encoder_out_size, rf_size_and_start
 
 
+@deprecated("Use functions of retinal_rl.analysis.statistics")
 def gradient_receptive_fields(
     device: torch.device, enc: ConvolutionalEncoder
 ) -> Dict[str, NDArray[np.float64]]:
@@ -53,10 +54,10 @@ def gradient_receptive_fields(
 
             # Assert min max is in bounds
             # potential TODO: change input size if rf is larger than actual input
-            h_min = max(0,h_min)
-            w_min = max(0,w_min)
-            hrf_size = min(hght,hrf_size)
-            wrf_size = min(wdth,wrf_size)
+            h_min = max(0, h_min)
+            w_min = max(0, w_min)
+            hrf_size = min(hght, hrf_size)
+            wrf_size = min(wdth, wrf_size)
 
             h_max = h_min + hrf_size
             w_max = w_min + wrf_size
@@ -75,13 +76,18 @@ def gradient_receptive_fields(
 
     return stas
 
-def _activation_triggered_average(model: nn.Sequential, n_batch: int = 2048, rf_size=None, device=None):
+
+def _activation_triggered_average(
+    model: nn.Sequential, n_batch: int = 2048, rf_size=None, device=None
+):
     model.eval()
     if rf_size is None:
         _out_channels, input_size = get_input_output_shape(model)
     else:
         input_size = rf_size
-    input_tensor = torch.randn((n_batch, *input_size), requires_grad=False, device=device)
+    input_tensor = torch.randn(
+        (n_batch, *input_size), requires_grad=False, device=device
+    )
     output = model(input_tensor)
     output = sum_collapse_output(output)
     input_tensor = input_tensor[:, None, :, :, :].expand(
@@ -94,41 +100,54 @@ def _activation_triggered_average(model: nn.Sequential, n_batch: int = 2048, rf_
     weighted = (weights * input_tensor).sum(0)
     return weighted.cpu().detach(), weight_sums.cpu().detach()
 
+
 def activation_triggered_average(
-    model: nn.Sequential, n_batch: int = 2048, n_iter: int = 1, rf_size=None, device=None
+    model: nn.Sequential,
+    n_batch: int = 2048,
+    n_iter: int = 1,
+    rf_size=None,
+    device=None,
 ) -> Dict[str, NDArray[np.float64]]:
     # TODO: WIP
     warnings.warn("Code is not tested and might contain bugs.")
     stas: Dict[str, NDArray[np.float64]] = {}
     with torch.no_grad():
-        for index, (layer_name, mdl) in tqdm(enumerate(model.named_children()), total=len(model)):
-            weighted, weight_sums = _activation_triggered_average(model[:index+1], n_batch, device=device)
-            for _ in tqdm(range(n_iter - 1), total=n_iter-1, leave=False):
-                it_weighted, it_weight_sums = _activation_triggered_average(model[:index+1], n_batch, rf_size, device=device)
+        for index, (layer_name, mdl) in tqdm(
+            enumerate(model.named_children()), total=len(model)
+        ):
+            weighted, weight_sums = _activation_triggered_average(
+                model[: index + 1], n_batch, device=device
+            )
+            for _ in tqdm(range(n_iter - 1), total=n_iter - 1, leave=False):
+                it_weighted, it_weight_sums = _activation_triggered_average(
+                    model[: index + 1], n_batch, rf_size, device=device
+                )
                 weighted += it_weighted
                 weight_sums += it_weight_sums
-            stas[layer_name] = (weighted.cpu().detach() / weight_sums[:, None, None, None] / len(weight_sums)).numpy()
+            stas[layer_name] = (
+                weighted.cpu().detach()
+                / weight_sums[:, None, None, None]
+                / len(weight_sums)
+            ).numpy()
         torch.cuda.empty_cache()
     return stas
 
+
+@deprecated("Use functions of retinal_rl.analysis.statistics")
 def sum_collapse_output(out_tensor):
     if len(out_tensor.shape) > 2:
-        sum_dims = [2+i for i in range(len(out_tensor.shape)-2)]
+        sum_dims = [2 + i for i in range(len(out_tensor.shape) - 2)]
         out_tensor = torch.sum(out_tensor, dim=sum_dims)
     return out_tensor
 
 
-def get_input_output_shape(model: nn.Sequential):
-    """
-    Calculates the 'minimal' input and output of a sequential model.
-    If last layer is a convolutional layer, output is assumed to be the number of channels (so 1x1 in space).
-    Takes into account if last layer is a pooling layer.
-    For linear layer obviously the number of out_features.
-    TODO: assert kernel sizes etc are quadratic / implement adaptation to non quadratic kernels
-    """
+def _find_last_layer_shape(
+    model: nn.Sequential,
+) -> Tuple[int, Optional[int], Optional[int], Optional[int], bool]:
     _first = 0
     down_stream_linear = False
     num_outputs = None
+    in_size, in_channels = None, None
     for i, layer in enumerate(reversed(model)):
         _first += 1
         if isinstance(layer, nn.Linear):
@@ -137,25 +156,48 @@ def get_input_output_shape(model: nn.Sequential):
             in_size = layer.in_features
             down_stream_linear = True
             break
-        elif isinstance(layer, nn.Conv2d):
+        if isinstance(layer, nn.Conv2d):
             num_outputs = layer.out_channels
             in_channels = layer.in_channels
-            in_size = layer.in_channels * ((layer.kernel_size[0]-1)*layer.dilation[0]+1) ** 2
+            in_size = (
+                layer.in_channels
+                * ((layer.kernel_size[0] - 1) * layer.dilation[0] + 1) ** 2
+            )
             break
-        elif isinstance(layer, nn.MaxPool2d) or isinstance(layer, nn.AvgPool2d):
-            for prev_layer in reversed(model[:-i-1]):
+        if isinstance(layer, (nn.MaxPool2d, nn.AvgPool2d)):
+            for prev_layer in reversed(model[: -i - 1]):
                 if isinstance(prev_layer, nn.Conv2d):
                     in_channels = prev_layer.out_channels
                     break
-                elif isinstance(prev_layer, nn.Linear):
-                    in_channels=1
+                if isinstance(prev_layer, nn.Linear):
+                    in_channels = 1
                 else:
-                    raise Exception("layer before pooling needs to be conv or linear")
-            _kernel_size = layer.kernel_size if isinstance(layer.kernel_size, int) else layer.kernel_size[0]
+                    raise TypeError("layer before pooling needs to be conv or linear")
+            _kernel_size = (
+                layer.kernel_size
+                if isinstance(layer.kernel_size, int)
+                else layer.kernel_size[0]
+            )
             in_size = _kernel_size**2 * in_channels
             break
+    return _first, num_outputs, in_size, in_channels, down_stream_linear
 
 
+@deprecated("Use functions of retinal_rl.analysis.statistics")
+def get_input_output_shape(model: nn.Sequential):
+    """
+    Calculates the 'minimal' input and output of a sequential model.
+    If last layer is a convolutional layer, output is assumed to be the number of channels (so 1x1 in space).
+    Takes into account if last layer is a pooling layer.
+    For linear layer obviously the number of out_features.
+    TODO: assert kernel sizes etc are quadratic / implement adaptation to non quadratic kernels
+    TODO: Check if still needed, function near duplicate of some of Sachas code
+    """
+
+    _first, num_outputs, in_size, in_channels, down_stream_linear = (
+        _find_last_layer_shape(model)
+    )
+
     for i, layer in enumerate(reversed(model[:-_first])):
         if isinstance(layer, nn.Linear):
             if num_outputs is None:
@@ -171,11 +213,11 @@ def get_input_output_shape(model: nn.Sequential):
             in_size = (
                 (in_size - 1) * layer.stride[0]
                 - 2 * layer.padding[0] * down_stream_linear
-                + ((layer.kernel_size[0]-1)*layer.dilation[0]+1) 
+                + ((layer.kernel_size[0] - 1) * layer.dilation[0] + 1)
             )
             in_size = in_size**2 * in_channels
-        elif isinstance(layer, nn.MaxPool2d) or isinstance(layer, nn.AvgPool2d):
-            for prev_layer in reversed(model[:-i-_first-1]):
+        elif isinstance(layer, (nn.MaxPool2d, nn.AvgPool2d)):
+            for prev_layer in reversed(model[: -i - _first - 1]):
                 if isinstance(prev_layer, nn.Conv2d):
                     in_channels = prev_layer.out_channels
                     break
@@ -191,6 +233,8 @@ def get_input_output_shape(model: nn.Sequential):
     input_size = (in_channels, in_size, in_size)
     return num_outputs, input_size
 
+
+@deprecated("Use functions of retinal_rl.analysis.statistics")
 def get_reconstructions(
     device: torch.device,
     brain: Brain,