From ae82af2f9ede197b5a7dbfa51a7b493025d42f2d Mon Sep 17 00:00:00 2001
From: William Song <30965609+Freakwill@users.noreply.github.com>
Date: Fri, 8 Dec 2023 14:37:08 +0800
Subject: [PATCH] learn

---
 pyrimidine/learn/__init__.py       |  18 ++++-
 pyrimidine/learn/neural_network.py |  71 ++++++++-----------
 pyrimidine/learn/regression.py     | 106 -----------------------------
 tests/test_learn.py                |  17 ++++-
 4 files changed, 60 insertions(+), 152 deletions(-)
 delete mode 100755 pyrimidine/learn/regression.py

diff --git a/pyrimidine/learn/__init__.py b/pyrimidine/learn/__init__.py
index 5bfb5d7..90d5afd 100755
--- a/pyrimidine/learn/__init__.py
+++ b/pyrimidine/learn/__init__.py
@@ -1,7 +1,21 @@
 #!/usr/bin/env python3
 
-from .base import BaseEstimator
+from sklearn.base import BaseEstimator as BE
 
 import warnings, os
 warnings.filterwarnings("ignore")
-os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
\ No newline at end of file
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+
+
+class BaseEstimator(BE):
+
+    @classmethod
+    def config(cls, X, Y, *args, **kwargs):
+        raise NotImplementedError
+
+    def fit(self, X, Y, pop=None, warm_start=False):
+        if warm_start:
+            self.pop = pop or self.pop or self.config(X, Y)
+        else:
+            self.pop = pop or self.config(X, Y)
+        self._fit(X, Y)
\ No newline at end of file
diff --git a/pyrimidine/learn/neural_network.py b/pyrimidine/learn/neural_network.py
index a50e3f8..26013e3 100644
--- a/pyrimidine/learn/neural_network.py
+++ b/pyrimidine/learn/neural_network.py
@@ -5,81 +5,66 @@
 from scipy.special import softmax
 from scipy.stats import entropy
 
-from keras.models import Sequential
-from keras.layers import Dense
-from sklearn.metrics import r2_score
+from sklearn.neural_network import MLPRegressor
 
-from pyrimidine import MixedIndividual, FloatChromosome, FloatMatrixChromosome
-from pyrimidine.population import HOFPopulation
-from pyrimidine.learn.base import BaseEstimator
+from .. import MixedIndividual, FloatChromosome, FloatMatrixChromosome
+from ..population import HOFPopulation
+from ..learn import BaseEstimator
 
 
-class GAANN(BaseEstimator, Sequential):
+class GAANN(BaseEstimator, MLPRegressor):
     """GA for ANN
     """
 
     pop = None
 
+    hidden_dim = 4
     max_iter = 100
+    n_layers = 3
 
     @classmethod
-    def create_model(cls, input_dim, output_dim):
-        # create Sequential object
-        hidden_dim = 4
-        model = Sequential()
-        model.add(Dense(hidden_dim, activation='relu', input_dim=input_dim))
-        model.add(Dense(output_dim))
+    def create_model(cls, *args, **kwargs):
+        # create MLPRegressor object
+        model = MLPRegressor(hidden_layer_sizes=(cls.hidden_dim,), max_iter=1, *args, **kwargs)
+        model.out_activation_ = 'identity'
+        model.n_layers_ = cls.n_layers
         return model
 
-    @classmethod
-    def create(cls, input_dim, output_dim):
+    def __init__(self, *args, **kwargs):
         # create GAANN object
-        hidden_dim = 4
-        model = cls()
-        model.add(Dense(hidden_dim, activation='relu', input_dim=input_dim))
-        model.add(Dense(output_dim))
-        return model
+        super().__init__(hidden_layer_sizes=(self.hidden_dim,), max_iter=1, *args, **kwargs)
+        self.out_activation_ = 'identity'
+        self.n_layers_ = 3
 
     @classmethod
-    def from_data(cls, X, Y):
-        input_dim = X.shape[1]
-        output_dim = Y.shape[1]
-        return cls.create(input_dim, output_dim)
+    def config(cls, X, Y, n_individuals=10, *args, **kwargs):
+        # configure the population for GA
 
-    @classmethod
-    def config(cls, X, Y, n_individuals=10):
-        hidden_dim = 4
         input_dim = X.shape[1]
         output_dim = Y.shape[1]
 
         class MyIndividual(MixedIndividual):
 
-            element_class = FloatMatrixChromosome, FloatChromosome, FloatChromosome, FloatChromosome
+            element_class = FloatMatrixChromosome, FloatChromosome, FloatMatrixChromosome, FloatChromosome
 
             def _fitness(self):
                 model = self.decode()
-                return r2_score(Y, model.predict(X))
+                return model.score(X, Y)
 
             def decode(self):
-                model = cls.create_model(input_dim, output_dim)
-                for k, layer in enumerate(model.layers):
-                    weights = (self.chromosomes[2*k], self.chromosomes[2*k+1])
-                    layer.set_weights(weights)
+                model = cls.create_model(*args, **kwargs)
+                model.coefs_ = tuple(map(np.asarray, self[::2]))
+                model.intercepts_ = tuple(map(np.asarray, self[1::2]))
+                model.n_layers_ = 3
                 return model
 
         MyPopulation = HOFPopulation[MyIndividual]
 
-        return MyPopulation.random(n_individuals=n_individuals, size=((input_dim, hidden_dim), hidden_dim, (hidden_dim, output_dim), output_dim))
-
-    def predict(self, X):
-        return super().predict(X, verbose=0)
+        return MyPopulation.random(n_individuals=n_individuals, size=((input_dim, cls.hidden_dim), cls.hidden_dim, (cls.hidden_dim, output_dim), output_dim))
 
-    def fit(self, X, Y, pop=None):
-        self.pop = pop or self.pop or self.config(X, Y)
+    def _fit(self, X, Y):
         self.pop.ezolve(n_iter=self.max_iter)
         model_ = self.pop.solution
-        self.set_weights(model_.get_weights())
-
-    def score(self, X, Y):
-        return r2_score(Y, self.predict(X))
+        self.coefs_ = model_.coefs_
+        self.intercepts_ = model_.intercepts_
 
diff --git a/pyrimidine/learn/regression.py b/pyrimidine/learn/regression.py
deleted file mode 100755
index a5a9c06..0000000
--- a/pyrimidine/learn/regression.py
+++ /dev/null
@@ -1,106 +0,0 @@
-#!/usr/bin/env python3
-
-
-from pyrimidine import *
-from digit_converter import IntervalConverter
-
-c = IntervalConverter(lb=-60, ub=60)
-class _BinaryChromosome(BinaryChromosome):
-    def decode(self):
-        return c(self)
-
-c = IntervalConverter(lb=0, ub=5)
-class _BinaryChromosome2(BinaryChromosome):
-    def decode(self):
-        return c(self)
-
-import numpy as np
-import numpy.linalg as LA
-from sklearn.linear_model import *
-from pyrimidine.learn import BaseEstimator
-
-class GALinearRegression(BaseEstimator, LinearRegression):
-    '''Linear Regression
-
-    solve Xp = y, with min_p ||Xp-y|| + a||p||, a>=0
-    '''
-
-    alpha = 0.05 # Regularization strength
-
-    def postprocess(self):
-        self.coef_ = self.best.chromosomes[0]
-        self.intercept_ = self.best.chromosomes[1].decode()
-
-    def config(self, X, y):
-        class MyIndividual(SelfAdaptiveIndividual):
-            params={'sigma':0.02}
-            element_class = FloatChromosome, _BinaryChromosome, _BinaryChromosome2, FloatChromosome
-
-            @property
-            def sigma(self):
-                return self.chromosomes[2].decode()
-
-            def mutate(self, copy=False):
-                self.fitness = None
-                for chromosome in self.chromosomes[1:]:
-                        chromosome.mutate()
-                self.chromosomes[0].mutate(sigma=self.sigma)
-                return self
-
-            def _fitness(self):
-                coef = self.chromosomes[0]
-                intercept = self.chromosomes[1].decode()
-                return - LA.norm(X @ coef +intercept - y) - GALinearRegression.alpha * LA.norm(coef, 1)
-
-        class MyPopulation(SGA2Population):
-            element_class = MyIndividual
-
-        pop = MyPopulation.random(n_individuals=100, sizes=(11, 12, 10, 2))
-        return pop
-
-
-    def perf(self, n=10, *args, **kwargs):
-        """Check the performance by running it several times
-        
-        Arguments:
-            n {int} -- running times
-        
-        Returns:
-            number -- mean time
-        """
-        import time
-        times = []
-        for _ in range(n):
-            time1 = time.perf_counter()
-            self.fit(*args, **kwargs)
-            time2 = time.perf_counter()
-            times.append(time2 - time1)
-        return np.mean(times)
-
-if __name__ == '__main__':
-    def rel_error(y, t, m=None):
-        if m is None:
-            m = t.mean()
-        return LA.norm(t - y) / LA.norm(t)
-
-    import pandas as pd
-    from sklearn.model_selection import train_test_split
-    data = pd.read_csv('~/Folders/Database/winequality.csv')
-    keys = data.columns
-    A = data[keys[:-1]].values # the rest is input
-    B = data[keys[-1]].values  # the last colunm is ouput
-    A, A_test, B, B_test = train_test_split(A, B, test_size=0.3)
-    r = GALinearRegression()
-    r.fit(A, B)
-    print(f'''
-coef_: {r.coef_}
-intercept_: {r.intercept_}
-train error: {r.score(A, B)}
-test Error: {r.score(A_test, B_test)}''')
-    r = LinearRegression()
-    r.fit(A, B)
-    print(f'''
-coef_: {r.coef_}
-intercept_: {r.intercept_}
-train error: {r.score(A, B)}
-test Error: {r.score(A_test, B_test)}''')
diff --git a/tests/test_learn.py b/tests/test_learn.py
index 59c91bc..5e7fdbb 100644
--- a/tests/test_learn.py
+++ b/tests/test_learn.py
@@ -4,16 +4,31 @@
 import numpy as np
 
 from pyrimidine.learn.neural_network import GAANN
+from pyrimidine.learn.regression import GALinearRegression
 
 
 def test_ann():
     X = np.array([[0,0], [0,1], [1,0], [1,1]])
     Y = np.array([[0,1], [1,0], [1,0], [0,1]])
 
-    model = GAANN.from_data(X, Y)
+    model = GAANN()
     pop = GAANN.config(X, Y)
     s0 = pop.fitness
     model.max_iter = 2
     model.fit(X, Y, pop)
     s1 = model.score(X, Y)
     assert s0 <= s1
+
+def test_lr():
+    X = np.array([[0,0], [0,1], [1,0], [1,1]])
+    Y = np.array([2,1,0,0])
+
+    model = GALinearRegression()
+    pop = GALinearRegression.config(X, Y)
+    for i in pop:
+        i._fitness()
+    s0 = pop.fitness
+    model.max_iter = 2
+    model.fit(X, Y, pop)
+    s1 = model.score(X, Y)
+    assert s0 <= s1