Skip to content

Commit

Permalink
Implement additional method for optimizing output data
Browse files Browse the repository at this point in the history
  • Loading branch information
prasser committed Jun 12, 2017
1 parent c5384fe commit 0fee0b9
Show file tree
Hide file tree
Showing 2 changed files with 168 additions and 10 deletions.
4 changes: 3 additions & 1 deletion src/main/org/deidentifier/arx/ARXConfiguration.java
Original file line number Diff line number Diff line change
Expand Up @@ -1222,7 +1222,9 @@ protected ARXConfiguration getInstanceForLocalRecoding(RowSet rowset, double gsF
result.aCriteria = null;
result.criteria = criteria;
MetricConfiguration utilityConfig = result.getQualityModel().getConfiguration();
utilityConfig.setGsFactor(gsFactor);
if (!Double.isNaN(gsFactor)) {
utilityConfig.setGsFactor(gsFactor);
}
result.metric = result.getQualityModel().getDescription().createInstance(utilityConfig);

// Return
Expand Down
174 changes: 165 additions & 9 deletions src/main/org/deidentifier/arx/ARXResult.java
Original file line number Diff line number Diff line change
Expand Up @@ -489,10 +489,57 @@ public void progress(double progress) {
}
});
}

/**
* This method optimizes the given data output with local recoding to improve its utility
* @param handle
* @param gsFactor A factor [0,1] weighting generalization and suppression.
* The default value is 0.5, which means that generalization
* and suppression will be treated equally. A factor of 0
* will favor suppression, and a factor of 1 will favor
* generalization. The values in between can be used for
* balancing both methods.
* @param listener
* @return The number of optimized records
*/
public int optimize(DataHandle handle, double gsFactor, ARXListener listener) throws RollbackRequiredException {
return optimizeFast(handle, gsFactor, Double.NaN, listener);
}

/**
* This method optimizes the given data output with local recoding to improve its utility
* @param handle
* @param records A fraction [0,1] of records that need to be optimized.
* @return The number of optimized records
*/
public int optimizeFast(DataHandle handle,
double records) throws RollbackRequiredException {
return optimizeFast(handle, records, Double.NaN, new ARXListener(){
@Override
public void progress(double progress) {
// Empty by design
}
});
}

/**
* This method optimizes the given data output with local recoding to improve its utility
* @param handle
* @param records A fraction [0,1] of records that need to be optimized.
* @param listener
* @return The number of optimized records
*/
public int optimizeFast(DataHandle handle,
double records,
ARXListener listener) throws RollbackRequiredException {
return optimizeFast(handle, records, Double.NaN, listener);
}


/**
* This method optimizes the given data output with local recoding to improve its utility
* @param handle
* @param records A fraction [0,1] of records that need to be optimized.
* @param gsFactor A factor [0,1] weighting generalization and suppression.
* The default value is 0.5, which means that generalization
* and suppression will be treated equally. A factor of 0
Expand All @@ -502,7 +549,10 @@ public void progress(double progress) {
* @param listener
* @return The number of optimized records
*/
public int optimize(DataHandle handle, double gsFactor, ARXListener listener) throws RollbackRequiredException {
public int optimizeFast(DataHandle handle,
double records,
double gsFactor,
ARXListener listener) throws RollbackRequiredException {

// Check if null
if (listener == null) {
Expand All @@ -515,7 +565,12 @@ public int optimize(DataHandle handle, double gsFactor, ARXListener listener) th
}

// Check bounds
if (gsFactor < 0d || gsFactor > 1d) {
if (!Double.isNaN(records) && (records < 0d || records > 1d)) {
throw new IllegalArgumentException("Number of records to optimize must be in [0, 1]");
}

// Check bounds
if (!Double.isNaN(gsFactor) && (gsFactor < 0d || gsFactor > 1d)) {
throw new IllegalArgumentException("Generalization/suppression factor must be in [0, 1]");
}

Expand Down Expand Up @@ -555,7 +610,14 @@ public int optimize(DataHandle handle, double gsFactor, ARXListener listener) th
// - Subsets will be projected accordingly
// - Utility measures will be cloned
ARXConfiguration config = this.config.getInstanceForLocalRecoding(rowset, gsFactor);

if (!Double.isNaN(records)) {
double absoluteRecords = records * handle.getNumRows();
double relativeRecords = absoluteRecords / (double)rowset.size();
relativeRecords = relativeRecords < 0d ? 0d : relativeRecords;
relativeRecords = relativeRecords > 1d ? 1d : relativeRecords;
config.setMaxOutliers(1d - relativeRecords);
}

// In the data definition, only microaggregation functions maintain a state, but these
// are cloned, when cloning the definition
// TODO: This is probably not necessary, because they are used from the data manager,
Expand Down Expand Up @@ -641,7 +703,6 @@ public int optimize(DataHandle handle, double gsFactor, ARXListener listener) th
* balancing both methods.
* @param maxIterations The maximal number of iterations to perform
* @param adaptionFactor Is added to the gsFactor when reaching a fixpoint
* @param listener
* @throws RollbackRequiredException
*/
public void optimizeIterative(DataHandle handle,
Expand Down Expand Up @@ -686,13 +747,19 @@ public void optimizeIterative(final DataHandle handle,
throw new IllegalArgumentException("Max. iterations must be > zero");
}

// Prepare
int iterationsTotal = 0;
int optimizedCurrent = Integer.MAX_VALUE;
int optimizedTotal = 0;
int optimizedGoal = 0;
for (int row = 0; row < handle.getNumRows(); row++) {
optimizedGoal += handle.isOutlier(row) ? 1 : 0;
}

// Progress
listener.progress(0d);

// Outer loop
int optimizedTotal = 0;
int iterationsTotal = 0;
int optimizedCurrent = Integer.MAX_VALUE;
while (isOptimizable(handle) && iterationsTotal < maxIterations && optimizedCurrent > 0) {

// Perform individual optimization
Expand All @@ -711,14 +778,103 @@ public void optimizeIterative(final DataHandle handle,
iterationsTotal++;

// Progress
double progress1 = (double)optimizedTotal / (double)handle.getNumRows();
double progress1 = (double)optimizedTotal / (double)optimizedGoal;
double progress2 = (double)iterationsTotal / (double)maxIterations;
listener.progress(Math.max(progress1, progress2));
}

// Progress
listener.progress(1d);
}

/**
* This method optimizes the given data output with local recoding to improve its utility
* @param handle
* @param records A fraction [0,1] of records that need to be optimized in each step.
* @throws RollbackRequiredException
*/
public void optimizeIterativeFast(DataHandle handle,
double records) throws RollbackRequiredException {
this.optimizeIterativeFast(handle, records, Double.NaN, new ARXListener(){
@Override
public void progress(double progress) {
// Empty by design
}
});
}

/**
* This method optimizes the given data output with local recoding to improve its utility
* @param handle
* @param records A fraction [0,1] of records that need to be optimized in each step.
* @param listener
* @throws RollbackRequiredException
*/
public void optimizeIterativeFast(DataHandle handle,
double records,
ARXListener listener) throws RollbackRequiredException {
this.optimizeIterativeFast(handle, records, Double.NaN, listener);
}

/**
* This method optimizes the given data output with local recoding to improve its utility
* @param handle
* @param records A fraction [0,1] of records that need to be optimized in each step.
* @param gsFactor A factor [0,1] weighting generalization and suppression.
* The default value is 0.5, which means that generalization
* and suppression will be treated equally. A factor of 0
* will favor suppression, and a factor of 1 will favor
* generalization. The values in between can be used for
* balancing both methods.
* @param listener
* @throws RollbackRequiredException
*/
public void optimizeIterativeFast(final DataHandle handle,
double records,
double gsFactor,
final ARXListener listener) throws RollbackRequiredException {

if (!Double.isNaN(gsFactor) && (gsFactor < 0d || gsFactor > 1d)) {
throw new IllegalArgumentException("Generalization/suppression factor must be in [0, 1]");
}
if (records < 0d || records > 1d) {
throw new IllegalArgumentException("Number of records to optimize must be in [0, 1]");
}

// Prepare
int optimizedCurrent = Integer.MAX_VALUE;
int optimizedTotal = 0;
int optimizedGoal = 0;
for (int row = 0; row < handle.getNumRows(); row++) {
optimizedGoal += handle.isOutlier(row) ? 1 : 0;
}

// Progress
listener.progress(0d);

// Outer loop
while (isOptimizable(handle) && optimizedCurrent > 0) {

// Progress
final double minProgress = (double)optimizedTotal / (double)optimizedGoal;
final double maxProgress = minProgress + records;

// Perform individual optimization
optimizedCurrent = optimizeFast(handle, records, gsFactor, new ARXListener() {
@Override
public void progress(double progress) {
listener.progress(minProgress + progress * (maxProgress - minProgress));
}
});
optimizedTotal += optimizedCurrent;

// Progress
listener.progress((double)optimizedTotal / (double)optimizedGoal);
}

// Progress
listener.progress(1d);
}

/**
* Returns a map of all microaggregation functions
Expand Down

0 comments on commit 0fee0b9

Please sign in to comment.