diff --git a/README.md b/README.md index 912baea..4925cd5 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,22 @@ Ruck aims to fill the following criteria: 2. Be easily **extensible** and **debuggable**. 3. Performant while maintaining its simplicity. +## Features + +Ruck has various features that will be expanded upon in time +- ๐Ÿ“ฆ Modular evolutionary systems inspired by pytorch lightning + + Helps organise code & arguably looks clean + +- โž• Multi-Objective optimisation support + + Optionally optimised version of NSGA-II if `numba` is installed, over 65x faster + +- ๐ŸŽ Optional multithreading support with `ray`, including helper functions + +- ๐Ÿญ Factory methods for simple evolutionary algorithms + +- ๐Ÿงช Various helper functions for selection, mutation and mating + + ## Citing Ruck Please use the following citation if you use Ruck in your research: diff --git a/examples/multiobjective.py b/examples/multiobjective.py index c6cf6fd..2d5f6a0 100644 --- a/examples/multiobjective.py +++ b/examples/multiobjective.py @@ -25,7 +25,7 @@ from matplotlib import pyplot as plt from ruck import * -from ruck.external.deap import select_nsga2 +from ruck.functional import select_nsga2 class MultiObjectiveMinimalModule(EaModule): diff --git a/requirements-all.txt b/requirements-all.txt index 513b5a6..6a9b3c5 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -2,6 +2,9 @@ pip>=21.0 numpy>=1.19 tqdm>=4 +# optional for performance improvements +numba>=0.40.0 + # requirements needed for examples too ray>=1.6.0 deap>=1.3 diff --git a/ruck/external/_numba.py b/ruck/external/_numba.py new file mode 100644 index 0000000..5c53943 --- /dev/null +++ b/ruck/external/_numba.py @@ -0,0 +1,57 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + + +# ========================================================================= # +# Timer # +# ========================================================================= # + + +def optional_njit(*args, cache=True, **kwargs): + """ + Optionally apply the numba JIT to a function if numba is installed. + - Additionally by default sets the cache=True value on + the JIT functions so that startup times are faster! + """ + + def _decorator(fn): + # try import numba + try: + import numba + except ImportError: + import warnings + warnings.warn(f'Performance of {fn.__name__} will be slow. Skipping JIT compilation because numba is not installed!') + numba = None + # handle cases + if numba is not None: + fn = numba.njit(*args, cache=cache, **kwargs)(fn) + # done! + return fn + # return decorator + return _decorator + + +# ========================================================================= # +# lists # +# ========================================================================= # diff --git a/ruck/external/deap.py b/ruck/external/deap.py index 7501401..4add335 100644 --- a/ruck/external/deap.py +++ b/ruck/external/deap.py @@ -22,29 +22,44 @@ # SOFTWARE. # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +import warnings from typing import Optional -from typing import Tuple +from typing import Sequence + from ruck.functional import check_selection try: import deap except ImportError as e: - import warnings warnings.warn('failed to import deap, please install it: $ pip install deap') raise e + # ========================================================================= # # deap helper # # ========================================================================= # +# CONFIG: | JIT: | PYTHON: +# (in=0008 out=0004) | [OLD: 0.000176 NEW: 0.000156s SPEEDUP: 1.126655x] | [OLD: 0.000139 NEW: 0.000198s SPEEDUP: 0.699888x] +# (in=0064 out=0032) | [OLD: 0.002818 NEW: 0.000316s SPEEDUP: 8.913371x] | [OLD: 0.002732 NEW: 0.003151s SPEEDUP: 0.867194x] +# (in=0256 out=0128) | [OLD: 0.040459 NEW: 0.001258s SPEEDUP: 32.161621x] | [OLD: 0.038630 NEW: 0.045156s SPEEDUP: 0.855490x] +# (in=1024 out=0512) | [OLD: 0.672029 NEW: 0.010862s SPEEDUP: 61.872225x] | [OLD: 0.644428 NEW: 0.768074s SPEEDUP: 0.839018x] +# (in=4096 out=2048) | [OLD: 10.511867 NEW: 0.158704s SPEEDUP: 66.235660x] | [OLD: 10.326754 NEW: 12.973584s SPEEDUP: 0.795983x] + + @check_selection -def select_nsga2(population, num_offspring: int, weights: Optional[Tuple[float, ...]] = None): +def select_nsga2(population, num_offspring: int, weights: Optional[Sequence[float]] = None): """ This is hacky... ruck doesn't yet have NSGA2 support, but we will add it in future! """ + # this function has been deprecated + warnings.warn('`ruck.external.deap.select_nsga2` has been deprecated in favour of `ruck.functional.select_nsga2`. `ruck.external.deap` will be removed in version v0.3.0') + # checks + if num_offspring == 0: + return [] # get a fitness value to perform checks f = population[0].fitness # check fitness diff --git a/ruck/functional/__init__.py b/ruck/functional/__init__.py index f18cc3d..66cffc9 100644 --- a/ruck/functional/__init__.py +++ b/ruck/functional/__init__.py @@ -22,9 +22,23 @@ # SOFTWARE. # ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ -from ruck.functional._mate import * -from ruck.functional._mutate import * -from ruck.functional._select import * +from ruck.functional._mate import check_mating +from ruck.functional._mate import mate_crossover_1d +from ruck.functional._mate import mate_crossover_nd + +from ruck.functional._mutate import check_mutation +from ruck.functional._mutate import mutate_flip_bits +from ruck.functional._mutate import mutate_flip_bit_groups + +from ruck.functional._select import check_selection +from ruck.functional._select import select_best +from ruck.functional._select import select_worst +from ruck.functional._select import select_random +from ruck.functional._select import select_tournament + +from ruck.functional._select_nsga import select_nsga2 +from ruck.functional._select_nsga import argsort_non_dominated +from ruck.functional._select_nsga import compute_crowding_distances # helper -- should be replaced from ruck.functional._algorithm import * diff --git a/ruck/functional/_algorithm.py b/ruck/functional/_algorithm.py index 96b737f..c70b925 100644 --- a/ruck/functional/_algorithm.py +++ b/ruck/functional/_algorithm.py @@ -31,7 +31,7 @@ from ruck._member import Member from ruck._member import Population -from ruck.functional import SelectFnHint +from ruck.functional._select import SelectFnHint from ruck.functional._mate import MateFnHint from ruck.functional._mutate import MutateFnHint from ruck.util._iter import chained diff --git a/ruck/functional/_select_nsga.py b/ruck/functional/_select_nsga.py new file mode 100644 index 0000000..d3fa524 --- /dev/null +++ b/ruck/functional/_select_nsga.py @@ -0,0 +1,328 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +""" +This version of NSGA-II is inspired by that from DEAP. +- The core of the algorithm is heavily modified for numba + support, with benchmarks over 65x faster for large arrays. +""" + + +from typing import List +from typing import Optional +from typing import Sequence + +import numpy as np + +from ruck._member import Population +from ruck.external._numba import optional_njit +from ruck.functional._select import check_selection +from ruck.util._array import arggroup +from ruck.util._population import population_fitnesses + + +# ========================================================================= # +# Helper # +# ========================================================================= # + + +@check_selection +def select_nsga2(population: Population, num: int, weights: Optional[Sequence[float]] = None): + """ + NSGA-II works by: + 1. grouping the population fitness values into successive fronts with non-dominated sorting. + - only the first F fronts that contain at least N elements are returned, such that if + there were F-1 fronts we would have < N elements. + 2. filling in missing elements using the crowding distance. + - The first F-1 fronts are used by default containing K elements. The + remaining N - K elements are selected using the crowding distances between + elements of the of the Fth front. + + ALGORITHM: according to the original NSGA-II paper + "between two solutions with differing non-domination ranks we prefer the point + with the lower rank. Otherwise, if both the points belong to the same front + then we prefer the point which is located in a region with lesser number of + points (the size of the cuboid inclosing it is larger)." + + CITATION: + Deb, Kalyanmoy, et al. "A fast elitist non-dominated sorting genetic algorithm for + multi-objective optimization: NSGA-II." International conference on parallel problem + solving from nature. Springer, Berlin, Heidelberg, 2000. + + NOTE: + This version of NSGA-II is inspired by that from DEAP. + - The core of the algorithm is heavily modified for numba + support, with benchmarks over 65x faster for large arrays. + """ + # 1. apply non-dominated sorting to get the sequential non-dominated fronts + fitnesses = population_fitnesses(population, weights=weights) + fronts = argsort_non_dominated(fitnesses, at_least_n=num) + # check if we need more elements + chosen = [i for front in fronts[:-1] for i in front] + missing = num - len(chosen) + assert missing >= 0 + # 2. Add missing elements prioritising those with the largest crowding distances. + # Using weighted vs. original fitness values does not affect the crowding distance! + # NOTE: should this not operate over all elements? this feels weird just over the last front? + if missing > 0: + assert len(fronts[-1]) >= missing + front_fitnesses = [population[i].fitness for i in fronts[-1]] # TODO: `front_fitnesses = fitnesses[fronts[-1]]` should work, something is wrong with the `get_crowding_distances` function! + # compute distances + dists = compute_crowding_distances(front_fitnesses) + idxs = np.argsort(-np.array(dists)) + chosen.extend(fronts[-1][i] for i in idxs[:num - len(chosen)]) + # return the original members + assert len(chosen) == num + return [population[i] for i in chosen] + + +# ========================================================================= # +# Non-Dominated Sorting # +# ========================================================================= # + + +def argsort_non_dominated(fitnesses: np.array, at_least_n: int = None, first_front_only=False) -> List[List[int]]: + """ + Perform non-dominated arg-sorting on the elements in the array + - The indices are contained in "fonts". Each front is a list of points + that are not dominated by points in following fronts. + - returns at least `at_least_n` indices, or all + the indices if not specified. + + The algorithm works by repeatedly finding non-dominated fronts, and + then removing them from the sets of points being considered. + - An implementation detail is that we first check for duplicate + points, which are then re-added into the fronts after they are found. + + NOTE: A more efficient version of the algorithm + exists, but this is good enough for now. + """ + if at_least_n is None: + at_least_n = len(fitnesses) + # checks + assert at_least_n >= 0 + assert at_least_n <= len(fitnesses) + # exit early + if at_least_n == 0: + return [] + # collect all the non-unique elements into groups + groups, unique, counts = arggroup(fitnesses, keep_order=True, return_unique=True, return_counts=True, axis=0) + # sort the unique elements into fronts + u_fronts_idxs = _argsort_non_dominated_unique( + unique_values=unique, + unique_counts=counts, + at_least_n=at_least_n, + pareto_front_only=first_front_only, + ) + # expand the groups back into fronts + # that can contain repeated values + return [[g for i in front for g in groups[i]] for front in u_fronts_idxs] + + +@optional_njit() +def _argsort_non_dominated_unique( + unique_values: np.ndarray, + unique_counts: np.ndarray, + at_least_n: int, + pareto_front_only: bool = False, +) -> list: + assert unique_values.ndim == 2 + assert unique_counts.ndim == 1 + assert len(unique_values) == len(unique_counts) + + # get the number of items still to be + # selected so we can exit early + N = min(int(np.sum(unique_counts)), at_least_n) + U = len(unique_values) + # exit early if we need to + num_selected = 0 + # store the indices of the values that form the first front, + # containing all the items that are not dominated by anything else! + first_front = [] + # store the counts for each value based on + # the number of elements that dominate it + dominated_count = np.zeros(U, dtype='int') + + # construct a list of lists to store all the indices of the + # items that the parent item dominates + dominates_lists = [] + for _ in range(U): + # numba cannot infer the types if we dont do this + dominates_lists.append([-1]) + dominates_lists[-1].clear() + # get the first pareto optimal front as all the fitness values + # that are not dominated by any of the other fitness values + for i, i_fit in enumerate(unique_values): + # check if a fitness value is dominated by any of the other fitness values + # and update its statistics if it is + for j, j_fit in zip(range(i+1, U), unique_values[i + 1:]): + if _dominates(i_fit, j_fit): + dominated_count[j] += 1 + dominates_lists[i].append(j) + elif _dominates(j_fit, i_fit): + dominated_count[i] += 1 + dominates_lists[j].append(i) + # add to the front the fitness value + # if it is not dominated by anything else + if dominated_count[i] == 0: + num_selected += unique_counts[i] + first_front.append(i) + + # exit early + if pareto_front_only or (num_selected >= N): + return [first_front] + + # create the remaining fronts + prev_front = first_front + fronts = [first_front] + + # repeatedly generate the next fronts + while True: + # add a new front + curr_front = [] + fronts.append(curr_front) + # for each item in a previous front + added = False + for i in prev_front: + # check all the items that the previous value dominates + for j in dominates_lists[i]: + dominated_count[j] -= 1 + # add the item to the current front if it is only dominated + # by elements in the fronts before it. + if dominated_count[j] == 0: + num_selected += unique_counts[j] + curr_front.append(j) + added = True + # make sure that there is not an infinite + # loop if there is accidentally a bug! + if not added: + raise RuntimeError('This is a bug!') + # exit early + if num_selected >= N: + return fronts + # update the previous front + prev_front = curr_front + + +@optional_njit() +def _dominates(w_fitness, w_fitness_other): + """ + Return true if ALL values are [greater than or equal] AND + at least one value is strictly [greater than]. + - If any value is [less than] then it is non-dominating + - both arrays must be one dimensional and the same size, we don't check for this! + """ + dominated = False + for iv, jv in zip(w_fitness, w_fitness_other): + if iv > jv: + dominated = True + elif iv < jv: + return False + return dominated + + +# ========================================================================= # +# CROWDING DISTANCES # +# ========================================================================= # + + +# this is usually faster without JIT for very large arrays, +# but for small inputs the JIT is often better < 64 +# +# SHAPE: (N, F) | JIT: | NO-JIT: # +# โ” shape=(0, 2) | OLD: 0.000001s NEW: 0.000002s SPEEDUP: 0.368500 | OLD: 0.000000s NEW: 0.000002s SPEEDUP: 0.202025 โ”“ # +# โ”— shape=(0, 16) | OLD: 0.000000s NEW: 0.000001s SPEEDUP: 0.266382 | OLD: 0.000000s NEW: 0.000001s SPEEDUP: 0.193062 โ”› # +# โ” shape=(1, 2) | OLD: 0.000004s NEW: 0.000001s SPEEDUP: 3.007124 | OLD: 0.000004s NEW: 0.000007s SPEEDUP: 0.578199 โ”“ # +# โ”— shape=(1, 16) | OLD: 0.000015s NEW: 0.000003s SPEEDUP: 5.763490 | OLD: 0.000015s NEW: 0.000036s SPEEDUP: 0.417297 โ”› # +# โ” shape=(8, 2) | OLD: 0.000015s NEW: 0.000002s SPEEDUP: 6.663598 | OLD: 0.000015s NEW: 0.000017s SPEEDUP: 0.885061 โ”“ # +# โ”— shape=(8, 16) | OLD: 0.000092s NEW: 0.000009s SPEEDUP: 10.708658 | OLD: 0.000091s NEW: 0.000103s SPEEDUP: 0.883329 โ”› # +# โ” shape=(64, 2) | OLD: 0.000099s NEW: 0.000004s SPEEDUP: 23.352296 | OLD: 0.000094s NEW: 0.000018s SPEEDUP: 5.085974 โ”“ # +# โ”— shape=(64, 16) | OLD: 0.000702s NEW: 0.000029s SPEEDUP: 24.185735 | OLD: 0.000670s NEW: 0.000121s SPEEDUP: 5.514704 โ”› # +# โ” shape=(256, 2) | OLD: 0.000394s NEW: 0.000013s SPEEDUP: 29.474941 | OLD: 0.000385s NEW: 0.000029s SPEEDUP: 13.172985 โ”“ # +# โ”— shape=(256, 16) | OLD: 0.002923s NEW: 0.000177s SPEEDUP: 16.466151 | OLD: 0.002780s NEW: 0.000252s SPEEDUP: 11.045761 โ”› # +# โ” shape=(1024, 2) | OLD: 0.001671s NEW: 0.000103s SPEEDUP: 16.259937 | OLD: 0.001584s NEW: 0.000107s SPEEDUP: 14.738836 โ”“ # +# โ”— shape=(1024, 16) | OLD: 0.012324s NEW: 0.000879s SPEEDUP: 14.016850 | OLD: 0.011636s NEW: 0.000838s SPEEDUP: 13.879243 โ”› # +# โ” shape=(16384, 2) | OLD: 0.029240s NEW: 0.002433s SPEEDUP: 12.016264 | OLD: 0.028420s NEW: 0.002033s SPEEDUP: 13.977902 โ”“ # +# โ”— shape=(16384, 16) | OLD: 0.214716s NEW: 0.020512s SPEEDUP: 10.467601 | OLD: 0.212408s NEW: 0.016327s SPEEDUP: 13.009210 โ”› # + +def compute_crowding_distances(positions) -> np.ndarray: + """ + Compute the crowding distance for each position in an array. + - These are usually a 2D array of fitness values + + The general algorithm for the crowding distance is: + 1. for each component of the fitness/position, add to the distance for each element: + 2. | sort all the entries according to this component + 3. | the endpoints of the sorted array are assigned infinite distance + 4. | add to the distance for the middle element looking at consecutive triples, + | the value added is the distance between its two neighbours over the normalisation + | value, which is a multiple of the (max - min) over all values + + TODO: Is there not a better algorithm than this? The crowding + distance seems very dependant on alignment with axes? + """ + # make sure we have the right datatype + if not isinstance(positions, np.ndarray): + positions = np.array(positions, dtype='float64') + return _get_crowding_distances(positions) + + +@optional_njit() +def _get_crowding_distances(positions: np.ndarray) -> np.ndarray: + # get shape + assert positions.ndim == 2 + N, F = positions.shape + # exit early + if N == 0: + return np.zeros(0, dtype='float64') + # store for the values + distances = np.zeros(N, dtype='float64') + # 1. for each fitness component, update the distance for each member! + for crowd in positions.T: + # 2. sort in increasing order + crowd_idxs = np.argsort(crowd) + crowd = crowd[crowd_idxs] + # 3. update endpoint distances + distances[crowd_idxs[0]] = np.inf + distances[crowd_idxs[-1]] = np.inf + # get endpoints + m = crowd[0] + M = crowd[-1] + # skip if the endpoints are the same (values will be zero if this is the case) + # or if there are not enough elements to compute over consecutive triples + if (M == m) or (len(crowd) < 3): + continue + # normalize the values between the maximum and minimum distances + # NOTE: the original NSGA-II paper does not apply this normalization constant... + norm = F * (M - m) + # 4. compute the distance as the difference between the previous + # and next values all over the normalize distance + distances[crowd_idxs[1:-1]] += (crowd[2:] - crowd[:-2]) / norm + # return the distances! + return distances + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/ruck/util/_array.py b/ruck/util/_array.py new file mode 100644 index 0000000..c2c0e0c --- /dev/null +++ b/ruck/util/_array.py @@ -0,0 +1,80 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + + +from typing import Sequence +from typing import Union + +import numpy as np + + +# ========================================================================= # +# Array Util # +# ========================================================================= # + + +def arggroup( + numbers: Union[Sequence, np.ndarray], + axis=0, + keep_order=True, + return_unique: bool = False, + return_index: bool = False, + return_counts: bool = False, +): + """ + Group all the elements of the array. + - The returned groups contain the indices of + the original position in the arrays. + """ + + # convert + if not isinstance(numbers, np.ndarray): + numbers = np.array(numbers) + # checks + if numbers.ndim == 0: + raise ValueError('input array must have at least one dimension') + if numbers.size == 0: + return [] + # we need to obtain the sorted groups of + unique, index, inverse, counts = np.unique(numbers, return_index=True, return_inverse=True, return_counts=True, axis=axis) + # same as [ary[:idx[0]], ary[idx[0]:idx[1]], ..., ary[idx[-2]:idx[-1]], ary[idx[-1]:]] + groups = np.split(ary=np.argsort(inverse, axis=0), indices_or_sections=np.cumsum(counts)[:-1], axis=0) + # maintain original order + if keep_order: + add_order = index.argsort() # the order that items were added in + groups = [groups[i] for i in add_order] + # return values + results = [groups] + if return_unique: results.append(unique[add_order] if keep_order else unique) + if return_index: results.append(index[add_order] if keep_order else index) + if return_counts: results.append(counts[add_order] if keep_order else counts) + # unpack + if len(results) == 1: + return results[0] + return results + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/ruck/util/_population.py b/ruck/util/_population.py new file mode 100644 index 0000000..72d9e3a --- /dev/null +++ b/ruck/util/_population.py @@ -0,0 +1,68 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +from typing import Sequence +import numpy as np +from ruck import Population + + +# ========================================================================= # +# Population Helper # +# ========================================================================= # + + +def population_fitnesses(population: Population, weights: Sequence[float] = None) -> np.ndarray: + """ + Obtain an array of normalized fitness values from a population, the output + shape always has two dimensions. (len(population), len(fitness)) + - Fitness values have ndim==0 or ndim==1 and are always expanded to ndim=1 + + If weights are specified then we multiply by the normalized weights too. + - Weights have ndim==0 or ndim==1 and are broadcast to match the fitness values. + """ + fitnesses = np.array([m.fitness for m in population]) + # check dims + if fitnesses.ndim == 1: + fitnesses = fitnesses[:, None] + assert fitnesses.ndim == 2 + # exit early + if fitnesses.size == 0: + return fitnesses + # handle weights + if weights is not None: + weights = np.array(weights) + # check dims + if weights.ndim == 0: + weights = weights[None] + assert weights.ndim == 1 + # multiply + fitnesses *= weights[None, :] + assert fitnesses.ndim == 2 + # done + return fitnesses # shape: (len(population), len(fitness)) + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/setup.py b/setup.py index f713c5b..f232edc 100644 --- a/setup.py +++ b/setup.py @@ -48,7 +48,7 @@ author="Nathan Juraj Michlo", author_email="NathanJMichlo@gmail.com", - version="0.2.3", + version="0.2.4", python_requires=">=3.6", packages=setuptools.find_packages(), diff --git a/tests/test_nsga2.py b/tests/test_nsga2.py new file mode 100644 index 0000000..e7cc2a3 --- /dev/null +++ b/tests/test_nsga2.py @@ -0,0 +1,81 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + + +import numpy as np +import pytest + +from ruck import Member +from ruck.functional import select_nsga2 as select_nsga2_ruck +from ruck.external.deap import select_nsga2 as select_nsga2_deap + + +# ========================================================================= # +# TEST # +# ========================================================================= # + + +@pytest.mark.parametrize(['population_size', 'sel_num', 'fitness_size', 'weights'], [ + # basic + (0, 0, 2, (1, 1)), + (1, 0, 2, (1, 1)), + # (0, 1, 2, (1, 1)), + (1, 1, 2, (1, 1)), + # larger + (10, 0, 2, (1, 1)), + (10, 1, 2, (1, 1)), + (10, 5, 2, (1, 1)), + (10, 9, 2, (1, 1)), + (10, 10, 2, (1, 1)), + # (10, 11, 2, (1, 1)), + # (10, 20, 2, (1, 1)), + # weights + (10, 5, 2, ( 1, 1)), + (10, 5, 2, (-1, 1)), + (10, 5, 2, ( 1, -1)), + (10, 5, 2, (-1, -1)), + (10, 5, 3, (1, -1, 1)), + (10, 5, 4, (1, -1, 1, -1)), + (10, 5, 1, (1,)), + (10, 5, 1, (-1,)), +]) +def test(population_size, sel_num, fitness_size, weights): + np.random.seed(42) + # generate population + population = [ + Member(i, fitness=tuple(np.random.randint(5, size=fitness_size))) + for i in range(population_size) + ] + # select + sel_deap = select_nsga2_deap(population, sel_num, weights) + sel_ruck = select_nsga2_ruck(population, sel_num, weights) + # checks + assert [m.value for m in sel_deap] == [m.value for m in sel_ruck] + assert [m.fitness for m in sel_deap] == [m.fitness for m in sel_ruck] + assert sel_deap == sel_ruck + + +# ========================================================================= # +# END # +# ========================================================================= # diff --git a/tests/test_util_array.py b/tests/test_util_array.py new file mode 100644 index 0000000..0de713f --- /dev/null +++ b/tests/test_util_array.py @@ -0,0 +1,73 @@ +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ +# MIT License +# +# Copyright (c) 2021 Nathan Juraj Michlo +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~=~ + +import numpy as np +import pytest + +from ruck.util._array import arggroup + + +# ========================================================================= # +# TEST # +# ========================================================================= # + + +def _assert_groups_equal(groups, targets): + groups = [np.array(g).tolist() for g in groups] + targets = [np.array(t).tolist() for t in targets] + assert groups == targets + + +def test_arggroup_axis(): + numbers = [[2, 0], [2, 2], [0, 0], [2, 1], [2, 2], [2, 2], [0, 2], [1, 0], [1, 1], [1, 1]] + targets = [[2], [6], [7], [8, 9], [0], [3], [1, 4, 5]] + targets_orig_order = [[0], [1, 4, 5], [2], [3], [6], [7], [8, 9]] + # check that transposing everything works! + _assert_groups_equal(arggroup(numbers, axis=0), targets_orig_order) + _assert_groups_equal(arggroup(np.array(numbers), axis=0), targets_orig_order) + _assert_groups_equal(arggroup(np.array(numbers).T.tolist(), axis=1), targets_orig_order) + _assert_groups_equal(arggroup(np.array(numbers).T, axis=1), targets_orig_order) + # check that transposing everything works! + _assert_groups_equal(arggroup(numbers, axis=0, keep_order=False), targets) + _assert_groups_equal(arggroup(np.array(numbers), axis=0, keep_order=False), targets) + _assert_groups_equal(arggroup(np.array(numbers).T.tolist(), axis=1, keep_order=False), targets) + _assert_groups_equal(arggroup(np.array(numbers).T, axis=1, keep_order=False), targets) + + +def test_arggroup(): + _assert_groups_equal(arggroup([]), []) + _assert_groups_equal(arggroup(np.zeros([0])), []) + _assert_groups_equal(arggroup(np.zeros([0, 0])), []) + _assert_groups_equal(arggroup(np.zeros([1, 0])), []) + _assert_groups_equal(arggroup(np.zeros([0, 1])), []) + _assert_groups_equal(arggroup([1, 2, 3, 3]), [[0], [1], [2, 3]]) + _assert_groups_equal(arggroup([3, 2, 1, 3]), [[0, 3], [1], [2]]) + # check ndim=0 + with pytest.raises(ValueError, match=r'input array must have at least one dimension'): arggroup(0) + with pytest.raises(ValueError, match=r'input array must have at least one dimension'): arggroup(np.zeros([])) + + +# ========================================================================= # +# END # +# ========================================================================= #