Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update docs and add new tests #7

Merged
merged 4 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,10 @@ authors:
orcid: "https://orcid.org/0000-0002-0013-4602"
title: "ANJANA"
version: 0.2.1
date-released: 2024-04-18
date-released: 2024-05-13
url: "https://github.com/IFCA-Advanced-Computing/anjana"
identifiers:
- type: doi
value: 10.5281/zenodo.11186382


23 changes: 23 additions & 0 deletions docs/source/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,29 @@ Then, in order to create the hierarchies we can define the following dictionary:
1: np.array(["*"] * len(data["city"].values))} # Suppression
}
In addition, we can also use the function _generate_intervals()_ from _utils_ for creating the interval-based hierarchy as follows:

.. code-block:: python
import numpy as np
from anjana.anonymity import utils
age = data['age'].values
hierarchies = {
"age": {
0: data["age"].values,
1: utils.generate_intervals(data["age"].values, 0, 100, 5),
2: utils.generate_intervals(data["age"].values, 0, 100, 10),
},
"gender": {
0: data["gender"].values,
1: np.array(["*"] * len(data["gender"].values)) # Suppression
},
"city": {0: data["city"].values,
1: np.array(["*"] * len(data["city"].values))} # Suppression
}
.. _adult dataset: https://archive.ics.uci.edu/ml/datasets/adult
.. _examples folder of the repository: https://gitlab.ifca.es/privacy-security/siesta-anonymity/-/tree/main/examples
Expand Down
4 changes: 2 additions & 2 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
ANJANA
=============================================================================

|License| |codecov| |DOI| |PyPI| |Downloads| |Documentation Status|
|License| |codecov| |DOI| |Downloads| |Documentation Status|
|release-please| |Publish Package in PyPI| |CI/CD Pipeline| |Code Coverage|
|Python version|
|Python version| |PyPI|

ANJANA is a `Python`_ library which allows the application of different anonymity
techniques based on a set of identifiers, quasi-identifiers (QI) and a sensitive
Expand Down
2 changes: 1 addition & 1 deletion examples/hospital.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import numpy as np
import pandas as pd
from anjana.anonymity import k_anonymity, l_diversity, utils
from anjana.anonymity import k_anonymity, l_diversity, utils, basic_beta_likeness

data = pd.read_csv("data/hospital_extended.csv")

Expand Down
71 changes: 68 additions & 3 deletions tests/test_anonymity.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,21 @@ def test_entropy_l(self):
)
assert len(data_anon) == 0

def test_entropy_l1(self):
data_anon = anonymity.entropy_l_diversity(
self.data,
self.ident,
self.quasi_ident,
self.sens_att,
self.k,
1,
self.supp_level,
self.hierarchies,
)
assert 1 == pycanon.anonymity.entropy_l_diversity(
data_anon, self.quasi_ident, [self.sens_att]
)

def test_rec_c_l(self):
data_anon = anonymity.recursive_c_l_diversity(
self.data,
Expand Down Expand Up @@ -365,7 +380,11 @@ class TestHospital:
l_div = 2
supp_level = 0
hierarchies = {
"age": dict(pd.read_csv("./examples/hierarchies/age.csv", header=None)),
"age": {
0: data["age"].values,
1: utils.generate_intervals(data["age"].values, 0, 100, 5),
2: utils.generate_intervals(data["age"].values, 0, 100, 10),
},
"gender": {
0: data["gender"].values,
1: np.array(["*"] * len(data["gender"].values)),
Expand All @@ -392,6 +411,18 @@ def test_k_anon(self):
data_anon_real["age"] = hierarchy_age[2].values[pos]
assert data_anon_real.equals(data_anon)

def test_k_anon_big(self):
data_anon = anonymity.k_anonymity(
self.data,
self.ident,
self.quasi_ident,
30,
self.supp_level,
self.hierarchies,
)

assert data_anon.equals(pd.DataFrame())

def test_l_div(self):
data_anon = anonymity.l_diversity(
self.data,
Expand All @@ -414,6 +445,32 @@ def test_l_div(self):
data_anon_real["city"] = "*"
assert data_anon_real.equals(data_anon)

def test_basic_beta0_supp0(self):
data_anon = anonymity.basic_beta_likeness(
self.data,
self.ident,
self.quasi_ident,
self.sens_att,
self.k,
0,
0,
self.hierarchies,
)
assert data_anon.equals(pd.DataFrame())

def test_enhanced_beta0_supp0(self):
data_anon = anonymity.enhanced_beta_likeness(
self.data,
self.ident,
self.quasi_ident,
self.sens_att,
self.k,
0,
0,
self.hierarchies,
)
assert data_anon.equals(pd.DataFrame())

def test_get_transformation(self):
data_anon = anonymity.k_anonymity(
self.data,
Expand All @@ -431,7 +488,15 @@ def test_get_transformation(self):

def test_get_transformation_2qi(self):
hierarchies = {
"age": dict(pd.read_csv("./examples/hierarchies/age.csv", header=None)),
"age": {
0: self.data["age"].values,
1: utils.generate_intervals(
self.data["age"].values, 0, 100, 5
),
2: utils.generate_intervals(
self.data["age"].values, 0, 100, 10
),
},
"city": {
0: self.data["city"].values,
1: np.array(["*"] * len(self.data["city"].values)),
Expand All @@ -447,7 +512,7 @@ def test_get_transformation_2qi(self):
)

transformation = utils.get_transformation(
data_anon, self.quasi_ident, self.hierarchies
data_anon, self.quasi_ident, hierarchies
)
assert [2, 0, 0] == transformation

Expand Down
Loading