Added normalisations and unit test cases

Nike-Inc · Nov 24, 2024 · 8b5fbd6 · 8b5fbd6
1 parent 851e816
commit 8b5fbd6
Show file tree

Hide file tree

Showing 13 changed files with 10 additions and 54 deletions.
diff --git a/src/koheesio/clipping_test.py b/src/koheesio/clipping_test.py
@@ -17,23 +17,20 @@ def test_clip_data_beyond_limits(self):
         self.assertEqual(result, expected)
 
     def test_clip_data_all_below_min(self):
-        """Test clipping where all values are below the minimum."""
         data = [-10, -5, -1]
         config = ClipConfig(min_value=0, max_value=20)
         result = clip_data(data, config)
         expected = [0, 0, 0]
         self.assertEqual(result, expected)
 
     def test_clip_data_all_above_max(self):
-        """Test clipping where all values are above the maximum."""
         data = [25, 30, 35]
         config = ClipConfig(min_value=10, max_value=20)
         result = clip_data(data, config)
         expected = [20, 20, 20]
         self.assertEqual(result, expected)
 
     def test_clip_data_empty_list(self):
-        """Test clipping with an empty data list."""
         data = []
         config = ClipConfig(min_value=10, max_value=20)
         result = clip_data(data, config)

diff --git a/src/koheesio/exponential.py b/src/koheesio/exponential.py
@@ -2,7 +2,7 @@
 from pydantic import BaseModel
 
 class ExpTransformConfig(BaseModel):
-    base: float  # Base of the exponential function
+    base: float  
 
 def exp_transform(data: List[float], config: ExpTransformConfig) -> List[float]:
     import math

diff --git a/src/koheesio/exponential_test.py b/src/koheesio/exponential_test.py
@@ -13,25 +13,21 @@ def test_exp_transform_base_e(self):
         data = [1, 2, 3]
         config = ExpTransformConfig(base=2.71828)
         result = exp_transform(data, config)
-        # Just check it works; e-based calculations vary
         self.assertTrue(all(isinstance(x, float) for x in result))
 
     def test_exp_transform_large_values(self):
-        """Test exponential transformation with large values."""
         data = [10, 20]
         config = ExpTransformConfig(base=10)
         result = exp_transform(data, config)
         self.assertTrue(all(isinstance(x, float) for x in result))
 
     def test_exp_transform_with_invalid_base(self):
-        """Test exponential transformation with invalid base."""
         data = [1, 2, 3]
         config = ExpTransformConfig(base=-1)
         with self.assertRaises(ValueError):
             exp_transform(data, config)
 
     def test_exp_transform_empty_data(self):
-        """Test exponential transformation with an empty data list."""
         data = []
         config = ExpTransformConfig(base=2)
         result = exp_transform(data, config)

diff --git a/src/koheesio/min_max.py b/src/koheesio/min_max.py
@@ -1,11 +1,10 @@
 from typing import List
 
 def min_max_normalize(data: List[float], new_min: float, new_max: float) -> List[float]:
-    """Normalize data to a specific range [new_min, new_max] using Min-Max scaling."""
     if not data:
         raise ValueError("Data list cannot be empty.")
     min_data = min(data)
     max_data = max(data)
     if min_data == max_data:
-        return [new_min for _ in data]  # All values are the same; map to new_min
+        return [new_min for _ in data]  
     return [(new_min + (x - min_data) * (new_max - new_min) / (max_data - min_data)) for x in data]
diff --git a/src/koheesio/min_max_test.py b/src/koheesio/min_max_test.py
@@ -11,7 +11,7 @@ def test_standard_case(self):
     def test_identical_values(self):
         data = [10, 10, 10]
         result = min_max_normalize(data, 0, 1)
-        expected = [0.0, 0.0, 0.0]  # All values map to new_min
+        expected = [0.0, 0.0, 0.0]  
         self.assertEqual(result, expected)
 
     def test_negative_values(self):
@@ -25,10 +25,9 @@ def test_empty_data(self):
             min_max_normalize([], 0, 1)
 
     def test_negative_target_range(self):
-        """Test normalization with a negative target range."""
         data = [10, 20, 30, 40, 50]
         result = min_max_normalize(data, -5, -1)
-        expected = [-5.0, -4.0, -3.0, -2.0, -1.0]  # Map to the range [-5, -1]
+        expected = [-5.0, -4.0, -3.0, -2.0, -1.0]  
         self.assertEqual(result, expected)
 
 if __name__ == "__main__":

diff --git a/src/koheesio/normalize.py b/src/koheesio/normalize.py
@@ -6,17 +6,13 @@ class NormalizeConfig(BaseModel):
 
 def normalize(data, config: NormalizeConfig):
 
-    # Calculate the minimum and maximum values of the input data
     min_data = min(data)
     max_data = max(data)
 
-    # Calculate the range of the input data
     range_data = max_data - min_data
 
-    # Calculate the range of the target normalization
     range_config = config.max_value - config.min_value
 
-    # Normalize each value in the data
     normalized_data = [
         ((value - min_data) / range_data) * range_config + config.min_value
         for value in data

diff --git a/src/koheesio/scale.py b/src/koheesio/scale.py
@@ -1,16 +1,7 @@
 from typing import List
 
 def scale_data(data: List[float], multiplier: float) -> List[float]:
-    """
-    Scales a list of numbers by the given multiplier.
-
-    Args:
-        data (List[float]): A list of numerical data.
-        multiplier (float): The multiplier to scale each number.
-
-    Returns:
-        List[float]: A list of scaled numbers.
-    """
+
     if not isinstance(multiplier, (int, float)):
         raise ValueError("Multiplier must be a numeric value.")
     if not all(isinstance(num, (int, float)) for num in data):

diff --git a/src/koheesio/scale_test.py b/src/koheesio/scale_test.py
@@ -4,15 +4,13 @@
 class TestScaleData(unittest.TestCase):
 
     def test_scale_positive_numbers(self):
-        """Test scaling a list of positive numbers."""
         data = [1, 2, 3, 4, 5]
         multiplier = 2
         result = scale_data(data, multiplier)
         expected = [2, 4, 6, 8, 10]
         self.assertEqual(result, expected)
 
     def test_scale_negative_numbers(self):
-        """Test scaling a list of negative numbers."""
         data = [-1, -2, -3, -4, -5]
         multiplier = 3
         result = scale_data(data, multiplier)
@@ -28,23 +26,20 @@ def test_scale_negative_numbers(self):
         self.assertEqual(result, expected)
 
     def test_scale_with_zero_multiplier(self):
-        """Test scaling numbers with a multiplier of zero."""
         data = [10, -10, 100]
         multiplier = 0
         result = scale_data(data, multiplier)
         expected = [0, 0, 0]
         self.assertEqual(result, expected)
 
     def test_scale_with_one_multiplier(self):
-        """Test scaling numbers with a multiplier of one (should return the same list)."""
         data = [1.5, 2.5, 3.5]
         multiplier = 1
         result = scale_data(data, multiplier)
         expected = [1.5, 2.5, 3.5]
         self.assertEqual(result, expected)
 
     def test_scale_empty_list(self):
-        """Test scaling an empty list."""
         data = []
         multiplier = 2
         result = scale_data(data, multiplier)

diff --git a/src/koheesio/standardisation.py b/src/koheesio/standardisation.py
@@ -3,8 +3,8 @@
 import statistics
 
 class StandardizeConfig(BaseModel):
-    mean: float = None  # Optional precomputed mean
-    std_dev: float = None  # Optional precomputed standard deviation
+    mean: float = None  
+    std_dev: float = None  
 
 def standardize(data: List[float], config: StandardizeConfig) -> List[float]:
     mean = config.mean if config.mean is not None else statistics.mean(data)

diff --git a/src/koheesio/standardize_test.py b/src/koheesio/standardize_test.py
@@ -10,35 +10,31 @@ def test_standardize_identical_values(self):
             standardize(data, config)
 
     def test_standardize_with_precomputed_values(self):
-        """Test standardization with precomputed mean and standard deviation."""
         data = [10, 20, 30]
         config = StandardizeConfig(mean=20, std_dev=10)
         result = standardize(data, config)
         expected = [-1.0, 0.0, 1.0]
         self.assertEqual(result, expected)
 
     def test_standardize_empty_data(self):
-        """Test standardization with an empty data list."""
         data = []
         config = StandardizeConfig()
         with self.assertRaises(ValueError):
             standardize(data, config)
 
 
     def test_standardize_with_custom_mean_and_std_dev(self):
-        """Test standardization with a custom mean and standard deviation."""
         data = [1, 2, 3, 4, 5]
         config = StandardizeConfig(mean=3, std_dev=1)
         result = standardize(data, config)
         expected = [-2.0, -1.0, 0.0, 1.0, 2.0]
         self.assertEqual(result, expected)
 
     def test_standardize_with_small_variance(self):
-        """Test standardization with a dataset having a small variance."""
         data = [1.001, 1.002, 1.003, 1.004, 1.005]
         config = StandardizeConfig()
         result = standardize(data, config)
-        self.assertAlmostEqual(sum(result), 0, places=5)  # The sum of standardized values should be near 0
+        self.assertAlmostEqual(sum(result), 0, places=5)  
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/src/koheesio/test.py b/src/koheesio/test.py
@@ -5,44 +5,38 @@
 class TestNormalize(unittest.TestCase):
 
     def test_normalize_standard(self):
-        """Test normalization with standard positive integers."""
         data = [10, 20, 30, 40, 50]
         config = NormalizeConfig(min_value=0, max_value=1)
         result = normalize(data, config)
         expected = [0.0, 0.25, 0.5, 0.75, 1.0]
         self.assertEqual(result, expected)
 
     def test_normalize_negative_values(self):
-        """Test normalization with negative values."""
         data = [-50, -25, 0, 25, 50]
         config = NormalizeConfig(min_value=-1, max_value=1)
         result = normalize(data, config)
         expected = [-1.0, -0.5, 0.0, 0.5, 1.0]
         self.assertEqual(result, expected)
 
     def test_normalize_float_values(self):
-        """Test normalization with floating-point numbers."""
         data = [0.1, 0.2, 0.3, 0.4, 0.5]
         config = NormalizeConfig(min_value=0, max_value=1)
         result = normalize(data, config)
         expected = [0.0, 0.25, 0.49999999999999994, 0.7500000000000001, 1.0]
         self.assertEqual(result, expected)
 
     def test_normalize_inverted_config(self):
-        """Test normalization when config min_value is greater than max_value."""
         data = [10, 20, 30, 40, 50]
         config = NormalizeConfig(min_value=1, max_value=0)
         result = normalize(data, config)
         expected = [1.0, 0.75, 0.5, 0.25, 0.0]
         self.assertEqual(result, expected)
 
     def test_normalize_empty_data(self):
-        """Test normalization with an empty data list."""
         data = []
         config = NormalizeConfig(min_value=0, max_value=1)
         with self.assertRaises(ValueError):
             normalize(data, config)
 
 if __name__ == '__main__':
-
     unittest.main()
diff --git a/src/koheesio/z_score_normalize.py b/src/koheesio/z_score_normalize.py
@@ -2,11 +2,10 @@
 import statistics
 
 def z_score_normalize(data: List[float]) -> List[float]:
-    """Normalize data using Z-Score normalization (mean = 0, std deviation = 1)."""
     if not data:
         raise ValueError("Data list cannot be empty.")
     mean = statistics.mean(data)
     std_dev = statistics.stdev(data)
     if std_dev == 0:
-        return [0.0 for _ in data]  # All values are identical; z-score is 0
+        return [0.0 for _ in data]  
     return [(x - mean) / std_dev for x in data]
diff --git a/src/koheesio/z_score_test.py b/src/koheesio/z_score_test.py
@@ -4,21 +4,18 @@
 class TestZScoreNormalize(unittest.TestCase):
 
     def test_large_numbers(self):
-        """Test z-score normalization with large numbers."""
         data = [1e10, 2e10, 3e10, 4e10, 5e10]
         result = z_score_normalize(data)
         expected = [-1.2649110640673518, -0.6324555320336759, 0.0, 0.6324555320336759, 1.2649110640673518]
         self.assertAlmostEqual(result, expected, places=5)
 
     def test_mixed_positive_and_negative_values(self):
-        """Test z-score normalization with a mix of positive and negative values."""
         data = [-10, -5, 0, 5, 10]
         result = z_score_normalize(data)
         expected = [-1.2649110640673518, -0.6324555320336759, 0.0, 0.6324555320336759, 1.2649110640673518]
         self.assertAlmostEqual(result, expected, places=5)
 
     def test_precomputed_mean_and_std_dev(self):
-        """Test z-score normalization with precomputed mean and std deviation."""
         from statistics import mean, stdev
         data = [10, 20, 30, 40, 50]
         mean_value = mean(data)
@@ -29,18 +26,15 @@ def test_precomputed_mean_and_std_dev(self):
 
 
     def test_data_with_highly_skewed_distribution(self):
-        """Test z-score normalization with a highly skewed distribution."""
         data = [1, 2, 3, 4, 1000]
         result = z_score_normalize(data)
-        # Validate that the outlier affects the z-scores
         self.assertTrue(result[-1] > 1.0)
         self.assertTrue(result[0] < 0.0)
 
     def test_already_normalized_data(self):
-        """Test z-score normalization on already normalized data."""
         data = [-1.2649110640673518, -0.6324555320336759, 0.0, 0.6324555320336759, 1.2649110640673518]
         result = z_score_normalize(data)
-        expected = data  # Z-score normalizing already normalized data should not change it
+        expected = data 
         self.assertAlmostEqual(result, expected, places=5)
 
 if __name__ == "__main__":