From ae82af2f9ede197b5a7dbfa51a7b493025d42f2d Mon Sep 17 00:00:00 2001 From: William Song <30965609+Freakwill@users.noreply.github.com> Date: Fri, 8 Dec 2023 14:37:08 +0800 Subject: [PATCH] learn --- pyrimidine/learn/__init__.py | 18 ++++- pyrimidine/learn/neural_network.py | 71 ++++++++----------- pyrimidine/learn/regression.py | 106 ----------------------------- tests/test_learn.py | 17 ++++- 4 files changed, 60 insertions(+), 152 deletions(-) delete mode 100755 pyrimidine/learn/regression.py diff --git a/pyrimidine/learn/__init__.py b/pyrimidine/learn/__init__.py index 5bfb5d7..90d5afd 100755 --- a/pyrimidine/learn/__init__.py +++ b/pyrimidine/learn/__init__.py @@ -1,7 +1,21 @@ #!/usr/bin/env python3 -from .base import BaseEstimator +from sklearn.base import BaseEstimator as BE import warnings, os warnings.filterwarnings("ignore") -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' \ No newline at end of file +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + + +class BaseEstimator(BE): + + @classmethod + def config(cls, X, Y, *args, **kwargs): + raise NotImplementedError + + def fit(self, X, Y, pop=None, warm_start=False): + if warm_start: + self.pop = pop or self.pop or self.config(X, Y) + else: + self.pop = pop or self.config(X, Y) + self._fit(X, Y) \ No newline at end of file diff --git a/pyrimidine/learn/neural_network.py b/pyrimidine/learn/neural_network.py index a50e3f8..26013e3 100644 --- a/pyrimidine/learn/neural_network.py +++ b/pyrimidine/learn/neural_network.py @@ -5,81 +5,66 @@ from scipy.special import softmax from scipy.stats import entropy -from keras.models import Sequential -from keras.layers import Dense -from sklearn.metrics import r2_score +from sklearn.neural_network import MLPRegressor -from pyrimidine import MixedIndividual, FloatChromosome, FloatMatrixChromosome -from pyrimidine.population import HOFPopulation -from pyrimidine.learn.base import BaseEstimator +from .. import MixedIndividual, FloatChromosome, FloatMatrixChromosome +from ..population import HOFPopulation +from ..learn import BaseEstimator -class GAANN(BaseEstimator, Sequential): +class GAANN(BaseEstimator, MLPRegressor): """GA for ANN """ pop = None + hidden_dim = 4 max_iter = 100 + n_layers = 3 @classmethod - def create_model(cls, input_dim, output_dim): - # create Sequential object - hidden_dim = 4 - model = Sequential() - model.add(Dense(hidden_dim, activation='relu', input_dim=input_dim)) - model.add(Dense(output_dim)) + def create_model(cls, *args, **kwargs): + # create MLPRegressor object + model = MLPRegressor(hidden_layer_sizes=(cls.hidden_dim,), max_iter=1, *args, **kwargs) + model.out_activation_ = 'identity' + model.n_layers_ = cls.n_layers return model - @classmethod - def create(cls, input_dim, output_dim): + def __init__(self, *args, **kwargs): # create GAANN object - hidden_dim = 4 - model = cls() - model.add(Dense(hidden_dim, activation='relu', input_dim=input_dim)) - model.add(Dense(output_dim)) - return model + super().__init__(hidden_layer_sizes=(self.hidden_dim,), max_iter=1, *args, **kwargs) + self.out_activation_ = 'identity' + self.n_layers_ = 3 @classmethod - def from_data(cls, X, Y): - input_dim = X.shape[1] - output_dim = Y.shape[1] - return cls.create(input_dim, output_dim) + def config(cls, X, Y, n_individuals=10, *args, **kwargs): + # configure the population for GA - @classmethod - def config(cls, X, Y, n_individuals=10): - hidden_dim = 4 input_dim = X.shape[1] output_dim = Y.shape[1] class MyIndividual(MixedIndividual): - element_class = FloatMatrixChromosome, FloatChromosome, FloatChromosome, FloatChromosome + element_class = FloatMatrixChromosome, FloatChromosome, FloatMatrixChromosome, FloatChromosome def _fitness(self): model = self.decode() - return r2_score(Y, model.predict(X)) + return model.score(X, Y) def decode(self): - model = cls.create_model(input_dim, output_dim) - for k, layer in enumerate(model.layers): - weights = (self.chromosomes[2*k], self.chromosomes[2*k+1]) - layer.set_weights(weights) + model = cls.create_model(*args, **kwargs) + model.coefs_ = tuple(map(np.asarray, self[::2])) + model.intercepts_ = tuple(map(np.asarray, self[1::2])) + model.n_layers_ = 3 return model MyPopulation = HOFPopulation[MyIndividual] - return MyPopulation.random(n_individuals=n_individuals, size=((input_dim, hidden_dim), hidden_dim, (hidden_dim, output_dim), output_dim)) - - def predict(self, X): - return super().predict(X, verbose=0) + return MyPopulation.random(n_individuals=n_individuals, size=((input_dim, cls.hidden_dim), cls.hidden_dim, (cls.hidden_dim, output_dim), output_dim)) - def fit(self, X, Y, pop=None): - self.pop = pop or self.pop or self.config(X, Y) + def _fit(self, X, Y): self.pop.ezolve(n_iter=self.max_iter) model_ = self.pop.solution - self.set_weights(model_.get_weights()) - - def score(self, X, Y): - return r2_score(Y, self.predict(X)) + self.coefs_ = model_.coefs_ + self.intercepts_ = model_.intercepts_ diff --git a/pyrimidine/learn/regression.py b/pyrimidine/learn/regression.py deleted file mode 100755 index a5a9c06..0000000 --- a/pyrimidine/learn/regression.py +++ /dev/null @@ -1,106 +0,0 @@ -#!/usr/bin/env python3 - - -from pyrimidine import * -from digit_converter import IntervalConverter - -c = IntervalConverter(lb=-60, ub=60) -class _BinaryChromosome(BinaryChromosome): - def decode(self): - return c(self) - -c = IntervalConverter(lb=0, ub=5) -class _BinaryChromosome2(BinaryChromosome): - def decode(self): - return c(self) - -import numpy as np -import numpy.linalg as LA -from sklearn.linear_model import * -from pyrimidine.learn import BaseEstimator - -class GALinearRegression(BaseEstimator, LinearRegression): - '''Linear Regression - - solve Xp = y, with min_p ||Xp-y|| + a||p||, a>=0 - ''' - - alpha = 0.05 # Regularization strength - - def postprocess(self): - self.coef_ = self.best.chromosomes[0] - self.intercept_ = self.best.chromosomes[1].decode() - - def config(self, X, y): - class MyIndividual(SelfAdaptiveIndividual): - params={'sigma':0.02} - element_class = FloatChromosome, _BinaryChromosome, _BinaryChromosome2, FloatChromosome - - @property - def sigma(self): - return self.chromosomes[2].decode() - - def mutate(self, copy=False): - self.fitness = None - for chromosome in self.chromosomes[1:]: - chromosome.mutate() - self.chromosomes[0].mutate(sigma=self.sigma) - return self - - def _fitness(self): - coef = self.chromosomes[0] - intercept = self.chromosomes[1].decode() - return - LA.norm(X @ coef +intercept - y) - GALinearRegression.alpha * LA.norm(coef, 1) - - class MyPopulation(SGA2Population): - element_class = MyIndividual - - pop = MyPopulation.random(n_individuals=100, sizes=(11, 12, 10, 2)) - return pop - - - def perf(self, n=10, *args, **kwargs): - """Check the performance by running it several times - - Arguments: - n {int} -- running times - - Returns: - number -- mean time - """ - import time - times = [] - for _ in range(n): - time1 = time.perf_counter() - self.fit(*args, **kwargs) - time2 = time.perf_counter() - times.append(time2 - time1) - return np.mean(times) - -if __name__ == '__main__': - def rel_error(y, t, m=None): - if m is None: - m = t.mean() - return LA.norm(t - y) / LA.norm(t) - - import pandas as pd - from sklearn.model_selection import train_test_split - data = pd.read_csv('~/Folders/Database/winequality.csv') - keys = data.columns - A = data[keys[:-1]].values # the rest is input - B = data[keys[-1]].values # the last colunm is ouput - A, A_test, B, B_test = train_test_split(A, B, test_size=0.3) - r = GALinearRegression() - r.fit(A, B) - print(f''' -coef_: {r.coef_} -intercept_: {r.intercept_} -train error: {r.score(A, B)} -test Error: {r.score(A_test, B_test)}''') - r = LinearRegression() - r.fit(A, B) - print(f''' -coef_: {r.coef_} -intercept_: {r.intercept_} -train error: {r.score(A, B)} -test Error: {r.score(A_test, B_test)}''') diff --git a/tests/test_learn.py b/tests/test_learn.py index 59c91bc..5e7fdbb 100644 --- a/tests/test_learn.py +++ b/tests/test_learn.py @@ -4,16 +4,31 @@ import numpy as np from pyrimidine.learn.neural_network import GAANN +from pyrimidine.learn.regression import GALinearRegression def test_ann(): X = np.array([[0,0], [0,1], [1,0], [1,1]]) Y = np.array([[0,1], [1,0], [1,0], [0,1]]) - model = GAANN.from_data(X, Y) + model = GAANN() pop = GAANN.config(X, Y) s0 = pop.fitness model.max_iter = 2 model.fit(X, Y, pop) s1 = model.score(X, Y) assert s0 <= s1 + +def test_lr(): + X = np.array([[0,0], [0,1], [1,0], [1,1]]) + Y = np.array([2,1,0,0]) + + model = GALinearRegression() + pop = GALinearRegression.config(X, Y) + for i in pop: + i._fitness() + s0 = pop.fitness + model.max_iter = 2 + model.fit(X, Y, pop) + s1 = model.score(X, Y) + assert s0 <= s1