Skip to content

Commit

Permalink
Added normalisations and unit test cases
Browse files Browse the repository at this point in the history
  • Loading branch information
sritha272 committed Nov 24, 2024
1 parent 851e816 commit 8b5fbd6
Show file tree
Hide file tree
Showing 13 changed files with 10 additions and 54 deletions.
3 changes: 0 additions & 3 deletions src/koheesio/clipping_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,20 @@ def test_clip_data_beyond_limits(self):
self.assertEqual(result, expected)

def test_clip_data_all_below_min(self):
"""Test clipping where all values are below the minimum."""
data = [-10, -5, -1]
config = ClipConfig(min_value=0, max_value=20)
result = clip_data(data, config)
expected = [0, 0, 0]
self.assertEqual(result, expected)

def test_clip_data_all_above_max(self):
"""Test clipping where all values are above the maximum."""
data = [25, 30, 35]
config = ClipConfig(min_value=10, max_value=20)
result = clip_data(data, config)
expected = [20, 20, 20]
self.assertEqual(result, expected)

def test_clip_data_empty_list(self):
"""Test clipping with an empty data list."""
data = []
config = ClipConfig(min_value=10, max_value=20)
result = clip_data(data, config)
Expand Down
2 changes: 1 addition & 1 deletion src/koheesio/exponential.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pydantic import BaseModel

class ExpTransformConfig(BaseModel):
base: float # Base of the exponential function
base: float

def exp_transform(data: List[float], config: ExpTransformConfig) -> List[float]:
import math
Expand Down
4 changes: 0 additions & 4 deletions src/koheesio/exponential_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,21 @@ def test_exp_transform_base_e(self):
data = [1, 2, 3]
config = ExpTransformConfig(base=2.71828)
result = exp_transform(data, config)
# Just check it works; e-based calculations vary
self.assertTrue(all(isinstance(x, float) for x in result))

def test_exp_transform_large_values(self):
"""Test exponential transformation with large values."""
data = [10, 20]
config = ExpTransformConfig(base=10)
result = exp_transform(data, config)
self.assertTrue(all(isinstance(x, float) for x in result))

def test_exp_transform_with_invalid_base(self):
"""Test exponential transformation with invalid base."""
data = [1, 2, 3]
config = ExpTransformConfig(base=-1)
with self.assertRaises(ValueError):
exp_transform(data, config)

def test_exp_transform_empty_data(self):
"""Test exponential transformation with an empty data list."""
data = []
config = ExpTransformConfig(base=2)
result = exp_transform(data, config)
Expand Down
3 changes: 1 addition & 2 deletions src/koheesio/min_max.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from typing import List

def min_max_normalize(data: List[float], new_min: float, new_max: float) -> List[float]:
"""Normalize data to a specific range [new_min, new_max] using Min-Max scaling."""
if not data:
raise ValueError("Data list cannot be empty.")
min_data = min(data)
max_data = max(data)
if min_data == max_data:
return [new_min for _ in data] # All values are the same; map to new_min
return [new_min for _ in data]
return [(new_min + (x - min_data) * (new_max - new_min) / (max_data - min_data)) for x in data]
5 changes: 2 additions & 3 deletions src/koheesio/min_max_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def test_standard_case(self):
def test_identical_values(self):
data = [10, 10, 10]
result = min_max_normalize(data, 0, 1)
expected = [0.0, 0.0, 0.0] # All values map to new_min
expected = [0.0, 0.0, 0.0]
self.assertEqual(result, expected)

def test_negative_values(self):
Expand All @@ -25,10 +25,9 @@ def test_empty_data(self):
min_max_normalize([], 0, 1)

def test_negative_target_range(self):
"""Test normalization with a negative target range."""
data = [10, 20, 30, 40, 50]
result = min_max_normalize(data, -5, -1)
expected = [-5.0, -4.0, -3.0, -2.0, -1.0] # Map to the range [-5, -1]
expected = [-5.0, -4.0, -3.0, -2.0, -1.0]
self.assertEqual(result, expected)

if __name__ == "__main__":
Expand Down
4 changes: 0 additions & 4 deletions src/koheesio/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,13 @@ class NormalizeConfig(BaseModel):

def normalize(data, config: NormalizeConfig):

# Calculate the minimum and maximum values of the input data
min_data = min(data)
max_data = max(data)

# Calculate the range of the input data
range_data = max_data - min_data

# Calculate the range of the target normalization
range_config = config.max_value - config.min_value

# Normalize each value in the data
normalized_data = [
((value - min_data) / range_data) * range_config + config.min_value
for value in data
Expand Down
11 changes: 1 addition & 10 deletions src/koheesio/scale.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,7 @@
from typing import List

def scale_data(data: List[float], multiplier: float) -> List[float]:
"""
Scales a list of numbers by the given multiplier.
Args:
data (List[float]): A list of numerical data.
multiplier (float): The multiplier to scale each number.
Returns:
List[float]: A list of scaled numbers.
"""

if not isinstance(multiplier, (int, float)):
raise ValueError("Multiplier must be a numeric value.")
if not all(isinstance(num, (int, float)) for num in data):
Expand Down
5 changes: 0 additions & 5 deletions src/koheesio/scale_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,13 @@
class TestScaleData(unittest.TestCase):

def test_scale_positive_numbers(self):
"""Test scaling a list of positive numbers."""
data = [1, 2, 3, 4, 5]
multiplier = 2
result = scale_data(data, multiplier)
expected = [2, 4, 6, 8, 10]
self.assertEqual(result, expected)

def test_scale_negative_numbers(self):
"""Test scaling a list of negative numbers."""
data = [-1, -2, -3, -4, -5]
multiplier = 3
result = scale_data(data, multiplier)
Expand All @@ -28,23 +26,20 @@ def test_scale_negative_numbers(self):
self.assertEqual(result, expected)

def test_scale_with_zero_multiplier(self):
"""Test scaling numbers with a multiplier of zero."""
data = [10, -10, 100]
multiplier = 0
result = scale_data(data, multiplier)
expected = [0, 0, 0]
self.assertEqual(result, expected)

def test_scale_with_one_multiplier(self):
"""Test scaling numbers with a multiplier of one (should return the same list)."""
data = [1.5, 2.5, 3.5]
multiplier = 1
result = scale_data(data, multiplier)
expected = [1.5, 2.5, 3.5]
self.assertEqual(result, expected)

def test_scale_empty_list(self):
"""Test scaling an empty list."""
data = []
multiplier = 2
result = scale_data(data, multiplier)
Expand Down
4 changes: 2 additions & 2 deletions src/koheesio/standardisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import statistics

class StandardizeConfig(BaseModel):
mean: float = None # Optional precomputed mean
std_dev: float = None # Optional precomputed standard deviation
mean: float = None
std_dev: float = None

def standardize(data: List[float], config: StandardizeConfig) -> List[float]:
mean = config.mean if config.mean is not None else statistics.mean(data)
Expand Down
6 changes: 1 addition & 5 deletions src/koheesio/standardize_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,35 +10,31 @@ def test_standardize_identical_values(self):
standardize(data, config)

def test_standardize_with_precomputed_values(self):
"""Test standardization with precomputed mean and standard deviation."""
data = [10, 20, 30]
config = StandardizeConfig(mean=20, std_dev=10)
result = standardize(data, config)
expected = [-1.0, 0.0, 1.0]
self.assertEqual(result, expected)

def test_standardize_empty_data(self):
"""Test standardization with an empty data list."""
data = []
config = StandardizeConfig()
with self.assertRaises(ValueError):
standardize(data, config)


def test_standardize_with_custom_mean_and_std_dev(self):
"""Test standardization with a custom mean and standard deviation."""
data = [1, 2, 3, 4, 5]
config = StandardizeConfig(mean=3, std_dev=1)
result = standardize(data, config)
expected = [-2.0, -1.0, 0.0, 1.0, 2.0]
self.assertEqual(result, expected)

def test_standardize_with_small_variance(self):
"""Test standardization with a dataset having a small variance."""
data = [1.001, 1.002, 1.003, 1.004, 1.005]
config = StandardizeConfig()
result = standardize(data, config)
self.assertAlmostEqual(sum(result), 0, places=5) # The sum of standardized values should be near 0
self.assertAlmostEqual(sum(result), 0, places=5)

if __name__ == "__main__":
unittest.main()
6 changes: 0 additions & 6 deletions src/koheesio/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,44 +5,38 @@
class TestNormalize(unittest.TestCase):

def test_normalize_standard(self):
"""Test normalization with standard positive integers."""
data = [10, 20, 30, 40, 50]
config = NormalizeConfig(min_value=0, max_value=1)
result = normalize(data, config)
expected = [0.0, 0.25, 0.5, 0.75, 1.0]
self.assertEqual(result, expected)

def test_normalize_negative_values(self):
"""Test normalization with negative values."""
data = [-50, -25, 0, 25, 50]
config = NormalizeConfig(min_value=-1, max_value=1)
result = normalize(data, config)
expected = [-1.0, -0.5, 0.0, 0.5, 1.0]
self.assertEqual(result, expected)

def test_normalize_float_values(self):
"""Test normalization with floating-point numbers."""
data = [0.1, 0.2, 0.3, 0.4, 0.5]
config = NormalizeConfig(min_value=0, max_value=1)
result = normalize(data, config)
expected = [0.0, 0.25, 0.49999999999999994, 0.7500000000000001, 1.0]
self.assertEqual(result, expected)

def test_normalize_inverted_config(self):
"""Test normalization when config min_value is greater than max_value."""
data = [10, 20, 30, 40, 50]
config = NormalizeConfig(min_value=1, max_value=0)
result = normalize(data, config)
expected = [1.0, 0.75, 0.5, 0.25, 0.0]
self.assertEqual(result, expected)

def test_normalize_empty_data(self):
"""Test normalization with an empty data list."""
data = []
config = NormalizeConfig(min_value=0, max_value=1)
with self.assertRaises(ValueError):
normalize(data, config)

if __name__ == '__main__':

unittest.main()
3 changes: 1 addition & 2 deletions src/koheesio/z_score_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
import statistics

def z_score_normalize(data: List[float]) -> List[float]:
"""Normalize data using Z-Score normalization (mean = 0, std deviation = 1)."""
if not data:
raise ValueError("Data list cannot be empty.")
mean = statistics.mean(data)
std_dev = statistics.stdev(data)
if std_dev == 0:
return [0.0 for _ in data] # All values are identical; z-score is 0
return [0.0 for _ in data]
return [(x - mean) / std_dev for x in data]
8 changes: 1 addition & 7 deletions src/koheesio/z_score_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,18 @@
class TestZScoreNormalize(unittest.TestCase):

def test_large_numbers(self):
"""Test z-score normalization with large numbers."""
data = [1e10, 2e10, 3e10, 4e10, 5e10]
result = z_score_normalize(data)
expected = [-1.2649110640673518, -0.6324555320336759, 0.0, 0.6324555320336759, 1.2649110640673518]
self.assertAlmostEqual(result, expected, places=5)

def test_mixed_positive_and_negative_values(self):
"""Test z-score normalization with a mix of positive and negative values."""
data = [-10, -5, 0, 5, 10]
result = z_score_normalize(data)
expected = [-1.2649110640673518, -0.6324555320336759, 0.0, 0.6324555320336759, 1.2649110640673518]
self.assertAlmostEqual(result, expected, places=5)

def test_precomputed_mean_and_std_dev(self):
"""Test z-score normalization with precomputed mean and std deviation."""
from statistics import mean, stdev
data = [10, 20, 30, 40, 50]
mean_value = mean(data)
Expand All @@ -29,18 +26,15 @@ def test_precomputed_mean_and_std_dev(self):


def test_data_with_highly_skewed_distribution(self):
"""Test z-score normalization with a highly skewed distribution."""
data = [1, 2, 3, 4, 1000]
result = z_score_normalize(data)
# Validate that the outlier affects the z-scores
self.assertTrue(result[-1] > 1.0)
self.assertTrue(result[0] < 0.0)

def test_already_normalized_data(self):
"""Test z-score normalization on already normalized data."""
data = [-1.2649110640673518, -0.6324555320336759, 0.0, 0.6324555320336759, 1.2649110640673518]
result = z_score_normalize(data)
expected = data # Z-score normalizing already normalized data should not change it
expected = data
self.assertAlmostEqual(result, expected, places=5)

if __name__ == "__main__":
Expand Down

0 comments on commit 8b5fbd6

Please sign in to comment.