Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix rl config #50

Merged
merged 15 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 9 additions & 17 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,20 @@
import os
import sys
import warnings
from typing import Dict, List, cast

import hydra
import torch
from hydra.utils import instantiate
from omegaconf import DictConfig, OmegaConf

from retinal_rl.framework_interface import TrainingFramework
from retinal_rl.models.brain import Brain
from retinal_rl.rl.sample_factory.sf_framework import SFFramework
from runner.analyze import analyze
from runner.dataset import get_datasets
from runner.initialize import initialize
from runner.sweep import launch_sweep
from runner.train import train
from runner.util import assemble_neural_circuits, delete_results
from runner.util import create_brain, delete_results

# Load the eval resolver for OmegaConf
OmegaConf.register_new_resolver("eval", eval)
Expand All @@ -27,6 +25,9 @@
# Hydra entry point
@hydra.main(config_path="config/base", config_name="config", version_base=None)
def _program(cfg: DictConfig):
#TODO: Instead of doing checks of the config here, we should implement
# sth like the configstore which ensures config parameters are present

if cfg.command == "clean":
delete_results(cfg)
sys.exit(0)
Expand All @@ -37,23 +38,14 @@ def _program(cfg: DictConfig):

device = torch.device(cfg.system.device)

sensors = OmegaConf.to_container(cfg.brain.sensors, resolve=True)
sensors = cast(Dict[str, List[int]], sensors)

connections = OmegaConf.to_container(cfg.brain.connections, resolve=True)
connections = cast(List[List[str]], connections)

connectome, circuits = assemble_neural_circuits(
cfg.brain.circuits, sensors, connections
)

brain = Brain(circuits, sensors, connectome).to(device)
brain = create_brain(cfg.brain).to(device)

if hasattr(cfg, "optimizer"):
optimizer = instantiate(cfg.optimizer.optimizer, brain.parameters())
optimizer = instantiate(cfg.optimizer.optimizer, brain.parameters())
if hasattr(cfg.optimizer, "objective"):
objective = instantiate(cfg.optimizer.objective, brain=brain)
# TODO: RL framework currently can't use objective
else:
warnings.warn("No optimizer config specified, is that wanted?")
warnings.warn("No objective specified, is that wanted?")

if cfg.command == "scan":
print(brain.scan())
Expand Down
23 changes: 23 additions & 0 deletions resources/config_templates/user/brain/feedforward.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
name: feedforward
sensors:
vision:
- 3
- ${dataset.vision_height}
- ${dataset.vision_width}
connections:
- ["vision", "encoder"]
- ["encoder", "action_decoder"]
circuits:
encoder:
_target_: retinal_rl.models.circuits.convolutional.ConvolutionalEncoder
num_layers: 3
num_channels: [4,8,16]
kernel_size: 6
stride: 2
activation: ${activation}
action_decoder:
_target_: retinal_rl.models.circuits.fully_connected.FullyConnected
output_shape: ${action_decoder_out}
hidden_units: ${latent_dimension}
activation: ${activation}

6 changes: 6 additions & 0 deletions resources/config_templates/user/dataset/rl-apples.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: rl-apples

env_name: gathering-apples
vision_width: 160
vision_height: 120
input_satiety: true
17 changes: 17 additions & 0 deletions resources/config_templates/user/experiment/gathering-apples.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# @package _global_
defaults:
- _self_
- override /dataset: rl-apples
- override /brain: feedforward
- override /optimizer: rl-base

framework: rl

### Interpolation Parameters ###

# This is a free list of parameters that can be interpolated by the subconfigs
# in sweep, dataset, brain, and optimizer. A major use for this is interpolating
# values in the subconfigs, and then looping over them in a sweep.
activation: "elu"
latent_dimension: [2048,1024]
action_decoder_out: [512]
7 changes: 7 additions & 0 deletions resources/config_templates/user/optimizer/rl-base.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# The optimizer to use
optimizer: # torch.optim Class and parameters
_target_: torch.optim.Adam
lr: 0.0003

# The objective function
# TODO: Implement in RL and update config
120 changes: 82 additions & 38 deletions retinal_rl/rl/analysis/statistics.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if you use this it all, but you can probably delete this module.

Copy link
Contributor Author

@fabioseel fabioseel Oct 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yah mostly. There were some things I wanted to look into / compare, but maybe I can add @ deprecated for now already
(rest of this will be addressed in #35 )

Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
from typing import Dict, List, Tuple
import math
import warnings
from typing import Dict, List, Optional, Tuple

import numpy as np
import torch
import torch.nn as nn
from captum.attr import NeuronGradient
from numpy.typing import NDArray
from torch import Tensor
from torch import Tensor, nn
from torch.utils.data import Dataset
from tqdm import tqdm
from typing_extensions import deprecated

from retinal_rl.models.brain import Brain
from retinal_rl.models.circuits.convolutional import ConvolutionalEncoder
from retinal_rl.models.util import encoder_out_size, rf_size_and_start
from tqdm import tqdm
import math
from retinal_rl.util import encoder_out_size, rf_size_and_start


@deprecated("Use functions of retinal_rl.analysis.statistics")
def gradient_receptive_fields(
device: torch.device, enc: ConvolutionalEncoder
) -> Dict[str, NDArray[np.float64]]:
Expand Down Expand Up @@ -53,10 +54,10 @@ def gradient_receptive_fields(

# Assert min max is in bounds
# potential TODO: change input size if rf is larger than actual input
h_min = max(0,h_min)
w_min = max(0,w_min)
hrf_size = min(hght,hrf_size)
wrf_size = min(wdth,wrf_size)
h_min = max(0, h_min)
w_min = max(0, w_min)
hrf_size = min(hght, hrf_size)
wrf_size = min(wdth, wrf_size)

h_max = h_min + hrf_size
w_max = w_min + wrf_size
Expand All @@ -75,13 +76,18 @@ def gradient_receptive_fields(

return stas

def _activation_triggered_average(model: nn.Sequential, n_batch: int = 2048, rf_size=None, device=None):

def _activation_triggered_average(
model: nn.Sequential, n_batch: int = 2048, rf_size=None, device=None
):
model.eval()
if rf_size is None:
_out_channels, input_size = get_input_output_shape(model)
else:
input_size = rf_size
input_tensor = torch.randn((n_batch, *input_size), requires_grad=False, device=device)
input_tensor = torch.randn(
(n_batch, *input_size), requires_grad=False, device=device
)
output = model(input_tensor)
output = sum_collapse_output(output)
input_tensor = input_tensor[:, None, :, :, :].expand(
Expand All @@ -94,41 +100,54 @@ def _activation_triggered_average(model: nn.Sequential, n_batch: int = 2048, rf_
weighted = (weights * input_tensor).sum(0)
return weighted.cpu().detach(), weight_sums.cpu().detach()


def activation_triggered_average(
model: nn.Sequential, n_batch: int = 2048, n_iter: int = 1, rf_size=None, device=None
model: nn.Sequential,
n_batch: int = 2048,
n_iter: int = 1,
rf_size=None,
device=None,
) -> Dict[str, NDArray[np.float64]]:
# TODO: WIP
warnings.warn("Code is not tested and might contain bugs.")
stas: Dict[str, NDArray[np.float64]] = {}
with torch.no_grad():
for index, (layer_name, mdl) in tqdm(enumerate(model.named_children()), total=len(model)):
weighted, weight_sums = _activation_triggered_average(model[:index+1], n_batch, device=device)
for _ in tqdm(range(n_iter - 1), total=n_iter-1, leave=False):
it_weighted, it_weight_sums = _activation_triggered_average(model[:index+1], n_batch, rf_size, device=device)
for index, (layer_name, mdl) in tqdm(
enumerate(model.named_children()), total=len(model)
):
weighted, weight_sums = _activation_triggered_average(
model[: index + 1], n_batch, device=device
)
for _ in tqdm(range(n_iter - 1), total=n_iter - 1, leave=False):
it_weighted, it_weight_sums = _activation_triggered_average(
model[: index + 1], n_batch, rf_size, device=device
)
weighted += it_weighted
weight_sums += it_weight_sums
stas[layer_name] = (weighted.cpu().detach() / weight_sums[:, None, None, None] / len(weight_sums)).numpy()
stas[layer_name] = (
weighted.cpu().detach()
/ weight_sums[:, None, None, None]
/ len(weight_sums)
).numpy()
torch.cuda.empty_cache()
return stas


@deprecated("Use functions of retinal_rl.analysis.statistics")
def sum_collapse_output(out_tensor):
if len(out_tensor.shape) > 2:
sum_dims = [2+i for i in range(len(out_tensor.shape)-2)]
sum_dims = [2 + i for i in range(len(out_tensor.shape) - 2)]
out_tensor = torch.sum(out_tensor, dim=sum_dims)
return out_tensor


def get_input_output_shape(model: nn.Sequential):
"""
Calculates the 'minimal' input and output of a sequential model.
If last layer is a convolutional layer, output is assumed to be the number of channels (so 1x1 in space).
Takes into account if last layer is a pooling layer.
For linear layer obviously the number of out_features.
TODO: assert kernel sizes etc are quadratic / implement adaptation to non quadratic kernels
"""
def _find_last_layer_shape(
model: nn.Sequential,
) -> Tuple[int, Optional[int], Optional[int], Optional[int], bool]:
_first = 0
down_stream_linear = False
num_outputs = None
in_size, in_channels = None, None
for i, layer in enumerate(reversed(model)):
_first += 1
if isinstance(layer, nn.Linear):
Expand All @@ -137,25 +156,48 @@ def get_input_output_shape(model: nn.Sequential):
in_size = layer.in_features
down_stream_linear = True
break
elif isinstance(layer, nn.Conv2d):
if isinstance(layer, nn.Conv2d):
num_outputs = layer.out_channels
in_channels = layer.in_channels
in_size = layer.in_channels * ((layer.kernel_size[0]-1)*layer.dilation[0]+1) ** 2
in_size = (
layer.in_channels
* ((layer.kernel_size[0] - 1) * layer.dilation[0] + 1) ** 2
)
break
elif isinstance(layer, nn.MaxPool2d) or isinstance(layer, nn.AvgPool2d):
for prev_layer in reversed(model[:-i-1]):
if isinstance(layer, (nn.MaxPool2d, nn.AvgPool2d)):
for prev_layer in reversed(model[: -i - 1]):
if isinstance(prev_layer, nn.Conv2d):
in_channels = prev_layer.out_channels
break
elif isinstance(prev_layer, nn.Linear):
in_channels=1
if isinstance(prev_layer, nn.Linear):
in_channels = 1
else:
raise Exception("layer before pooling needs to be conv or linear")
_kernel_size = layer.kernel_size if isinstance(layer.kernel_size, int) else layer.kernel_size[0]
raise TypeError("layer before pooling needs to be conv or linear")
_kernel_size = (
layer.kernel_size
if isinstance(layer.kernel_size, int)
else layer.kernel_size[0]
)
in_size = _kernel_size**2 * in_channels
break
return _first, num_outputs, in_size, in_channels, down_stream_linear


@deprecated("Use functions of retinal_rl.analysis.statistics")
def get_input_output_shape(model: nn.Sequential):
"""
Calculates the 'minimal' input and output of a sequential model.
If last layer is a convolutional layer, output is assumed to be the number of channels (so 1x1 in space).
Takes into account if last layer is a pooling layer.
For linear layer obviously the number of out_features.
TODO: assert kernel sizes etc are quadratic / implement adaptation to non quadratic kernels
TODO: Check if still needed, function near duplicate of some of Sachas code
"""

_first, num_outputs, in_size, in_channels, down_stream_linear = (
_find_last_layer_shape(model)
)

for i, layer in enumerate(reversed(model[:-_first])):
if isinstance(layer, nn.Linear):
if num_outputs is None:
Expand All @@ -171,11 +213,11 @@ def get_input_output_shape(model: nn.Sequential):
in_size = (
(in_size - 1) * layer.stride[0]
- 2 * layer.padding[0] * down_stream_linear
+ ((layer.kernel_size[0]-1)*layer.dilation[0]+1)
+ ((layer.kernel_size[0] - 1) * layer.dilation[0] + 1)
)
in_size = in_size**2 * in_channels
elif isinstance(layer, nn.MaxPool2d) or isinstance(layer, nn.AvgPool2d):
for prev_layer in reversed(model[:-i-_first-1]):
elif isinstance(layer, (nn.MaxPool2d, nn.AvgPool2d)):
for prev_layer in reversed(model[: -i - _first - 1]):
if isinstance(prev_layer, nn.Conv2d):
in_channels = prev_layer.out_channels
break
Expand All @@ -191,6 +233,8 @@ def get_input_output_shape(model: nn.Sequential):
input_size = (in_channels, in_size, in_size)
return num_outputs, input_size


@deprecated("Use functions of retinal_rl.analysis.statistics")
def get_reconstructions(
device: torch.device,
brain: Brain,
Expand Down
Loading