Skip to content

Commit

Permalink
Merge pull request #7 from IFCA-Advanced-Computing/develop
Browse files Browse the repository at this point in the history
Update docs and add new tests
  • Loading branch information
judithspd authored May 15, 2024
2 parents fa80cd9 + 24ab9e4 commit 5a2c783
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 7 deletions.
7 changes: 6 additions & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,10 @@ authors:
orcid: "https://orcid.org/0000-0002-0013-4602"
title: "ANJANA"
version: 0.2.1
date-released: 2024-04-18
date-released: 2024-05-13
url: "https://github.com/IFCA-Advanced-Computing/anjana"
identifiers:
- type: doi
value: 10.5281/zenodo.11186382


23 changes: 23 additions & 0 deletions docs/source/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,29 @@ Then, in order to create the hierarchies we can define the following dictionary:
1: np.array(["*"] * len(data["city"].values))} # Suppression
}
In addition, we can also use the function _generate_intervals()_ from _utils_ for creating the interval-based hierarchy as follows:

.. code-block:: python
import numpy as np
from anjana.anonymity import utils
age = data['age'].values
hierarchies = {
"age": {
0: data["age"].values,
1: utils.generate_intervals(data["age"].values, 0, 100, 5),
2: utils.generate_intervals(data["age"].values, 0, 100, 10),
},
"gender": {
0: data["gender"].values,
1: np.array(["*"] * len(data["gender"].values)) # Suppression
},
"city": {0: data["city"].values,
1: np.array(["*"] * len(data["city"].values))} # Suppression
}
.. _adult dataset: https://archive.ics.uci.edu/ml/datasets/adult
.. _examples folder of the repository: https://gitlab.ifca.es/privacy-security/siesta-anonymity/-/tree/main/examples
Expand Down
4 changes: 2 additions & 2 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
ANJANA
=============================================================================

|License| |codecov| |DOI| |PyPI| |Downloads| |Documentation Status|
|License| |codecov| |DOI| |Downloads| |Documentation Status|
|release-please| |Publish Package in PyPI| |CI/CD Pipeline| |Code Coverage|
|Python version|
|Python version| |PyPI|

ANJANA is a `Python`_ library which allows the application of different anonymity
techniques based on a set of identifiers, quasi-identifiers (QI) and a sensitive
Expand Down
2 changes: 1 addition & 1 deletion examples/hospital.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import numpy as np
import pandas as pd
from anjana.anonymity import k_anonymity, l_diversity, utils
from anjana.anonymity import k_anonymity, l_diversity, utils, basic_beta_likeness

data = pd.read_csv("data/hospital_extended.csv")

Expand Down
71 changes: 68 additions & 3 deletions tests/test_anonymity.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,21 @@ def test_entropy_l(self):
)
assert len(data_anon) == 0

def test_entropy_l1(self):
data_anon = anonymity.entropy_l_diversity(
self.data,
self.ident,
self.quasi_ident,
self.sens_att,
self.k,
1,
self.supp_level,
self.hierarchies,
)
assert 1 == pycanon.anonymity.entropy_l_diversity(
data_anon, self.quasi_ident, [self.sens_att]
)

def test_rec_c_l(self):
data_anon = anonymity.recursive_c_l_diversity(
self.data,
Expand Down Expand Up @@ -365,7 +380,11 @@ class TestHospital:
l_div = 2
supp_level = 0
hierarchies = {
"age": dict(pd.read_csv("./examples/hierarchies/age.csv", header=None)),
"age": {
0: data["age"].values,
1: utils.generate_intervals(data["age"].values, 0, 100, 5),
2: utils.generate_intervals(data["age"].values, 0, 100, 10),
},
"gender": {
0: data["gender"].values,
1: np.array(["*"] * len(data["gender"].values)),
Expand All @@ -392,6 +411,18 @@ def test_k_anon(self):
data_anon_real["age"] = hierarchy_age[2].values[pos]
assert data_anon_real.equals(data_anon)

def test_k_anon_big(self):
data_anon = anonymity.k_anonymity(
self.data,
self.ident,
self.quasi_ident,
30,
self.supp_level,
self.hierarchies,
)

assert data_anon.equals(pd.DataFrame())

def test_l_div(self):
data_anon = anonymity.l_diversity(
self.data,
Expand All @@ -414,6 +445,32 @@ def test_l_div(self):
data_anon_real["city"] = "*"
assert data_anon_real.equals(data_anon)

def test_basic_beta0_supp0(self):
data_anon = anonymity.basic_beta_likeness(
self.data,
self.ident,
self.quasi_ident,
self.sens_att,
self.k,
0,
0,
self.hierarchies,
)
assert data_anon.equals(pd.DataFrame())

def test_enhanced_beta0_supp0(self):
data_anon = anonymity.enhanced_beta_likeness(
self.data,
self.ident,
self.quasi_ident,
self.sens_att,
self.k,
0,
0,
self.hierarchies,
)
assert data_anon.equals(pd.DataFrame())

def test_get_transformation(self):
data_anon = anonymity.k_anonymity(
self.data,
Expand All @@ -431,7 +488,15 @@ def test_get_transformation(self):

def test_get_transformation_2qi(self):
hierarchies = {
"age": dict(pd.read_csv("./examples/hierarchies/age.csv", header=None)),
"age": {
0: self.data["age"].values,
1: utils.generate_intervals(
self.data["age"].values, 0, 100, 5
),
2: utils.generate_intervals(
self.data["age"].values, 0, 100, 10
),
},
"city": {
0: self.data["city"].values,
1: np.array(["*"] * len(self.data["city"].values)),
Expand All @@ -447,7 +512,7 @@ def test_get_transformation_2qi(self):
)

transformation = utils.get_transformation(
data_anon, self.quasi_ident, self.hierarchies
data_anon, self.quasi_ident, hierarchies
)
assert [2, 0, 0] == transformation

Expand Down

0 comments on commit 5a2c783

Please sign in to comment.