Skip to content

Commit

Permalink
Add lowest risk
Browse files Browse the repository at this point in the history
  • Loading branch information
prasser committed Jan 2, 2025
1 parent ea5ea48 commit f5db199
Showing 1 changed file with 41 additions and 9 deletions.
50 changes: 41 additions & 9 deletions src/main/org/deidentifier/arx/risk/RiskModelSampleSummary.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,10 @@ public static class JournalistRisk extends RiskSummary {
* @param rA
* @param rB
* @param rC
* @param rM
*/
protected JournalistRisk(double t, double rA, double rB, double rC) {
super(t, rA, rB, rC);
protected JournalistRisk(double t, double rA, double rB, double rC, double rM) {
super(t, rA, rB, rC, rM);
}
}

Expand Down Expand Up @@ -94,9 +95,10 @@ public static class ProsecutorRisk extends RiskSummary {
* @param rA
* @param rB
* @param rC
* @param rM
*/
protected ProsecutorRisk(double t,double rA, double rB, double rC) {
super(t, rA, rB, rC);
protected ProsecutorRisk(double t,double rA, double rB, double rC, double rM) {
super(t, rA, rB, rC, rM);
}
}
/**
Expand All @@ -112,6 +114,8 @@ public static class RiskSummary {
private final double rA;
/** Maximum probability of re-identification*/
private final double rB;
/** Minimum probability of re-identification*/
private final double rM;
/** Proportion of records that can be re-identified on average*/
private final double rC;

Expand All @@ -121,12 +125,14 @@ public static class RiskSummary {
* @param rA
* @param rB
* @param rC
* @param rM
*/
protected RiskSummary(double t, double rA, double rB, double rC) {
protected RiskSummary(double t, double rA, double rB, double rC, double rM) {
this.t = t;
this.rA = rA;
this.rB = rB;
this.rC = rC;
this.rM = rM;
}

/**
Expand All @@ -152,6 +158,14 @@ public double getEffectiveRiskThreshold() {
public double getHighestRisk() {
return Double.isNaN(rB) ? 0d : rB;
}

/**
* Minimum probability of re-identification
* @return
*/
public double getLowestRisk() {
return Double.isNaN(rM) ? 0d : rM;
}

/**
* Proportion of records with risk above threshold
Expand Down Expand Up @@ -219,8 +233,8 @@ public RiskModelSampleSummary(DataHandleInternal handle,
population = sample;
}
if (sample.size() == 0) {
this.prosecutorRisk = new ProsecutorRisk(threshold, 0d, 0d, 0d);
this.journalistRisk = new JournalistRisk(threshold, 0d, 0d, 0d);
this.prosecutorRisk = new ProsecutorRisk(threshold, 0d, 0d, 0d, 0d);
this.journalistRisk = new JournalistRisk(threshold, 0d, 0d, 0d, 0d);
this.marketerRisk = new MarketerRisk(0d);
} else {
this.prosecutorRisk = getProsecutorRisk(population, sample, 0.9d, stop, progress);
Expand Down Expand Up @@ -359,12 +373,14 @@ private JournalistRisk getJournalistRisk(Groupify<TupleWrapper> population,
// Init
double rA = 0d;
double rB = 0d;
double rM = 0d;
double rC = 0d;
double rC1 = 0d;
double rC2 = 0d;
double numRecordsInSample = 0d;
double numClassesInSample = 0d;
double smallestClassSizeInPopulation = Integer.MAX_VALUE;
double largestClassSizeInPopulation = Integer.MIN_VALUE;
int maxindex = sample.size();
int index = 0;

Expand Down Expand Up @@ -393,6 +409,10 @@ private JournalistRisk getJournalistRisk(Groupify<TupleWrapper> population,
if (groupSizeInPopulation < smallestClassSizeInPopulation) {
smallestClassSizeInPopulation = groupSizeInPopulation;
}
// Compute rM
if (groupSizeInPopulation > largestClassSizeInPopulation) {
largestClassSizeInPopulation = groupSizeInPopulation;
}
// Compute rC
numClassesInSample++;
numRecordsInSample += groupSizeInSample;
Expand All @@ -411,14 +431,17 @@ private JournalistRisk getJournalistRisk(Groupify<TupleWrapper> population,

// Compute rB: smallest class is first class in the histogram
rB = 1d / smallestClassSizeInPopulation;

// Compute rM
rM = 1d / largestClassSizeInPopulation;

// Compute rC
rC1 = numClassesInSample / rC1;
rC2 = rC2 / numRecordsInSample;
rC = Math.max(rC1, rC2);

// Return
return new JournalistRisk(threshold, rA, rB, rC);
return new JournalistRisk(threshold, rA, rB, rC, rM);
}

/**
Expand Down Expand Up @@ -495,10 +518,12 @@ private ProsecutorRisk getProsecutorRisk(Groupify<TupleWrapper> population,
// Init
double rA = 0d;
double rB = 0d;
double rM = 0d;
double rC = 0d;
double numRecords = 0d;
double numClasses = 0d;
double smallestClassSize = Integer.MAX_VALUE;
double largestClassSize = Integer.MIN_VALUE;
int maxindex = sample.size();
int index = 0;

Expand All @@ -521,6 +546,10 @@ private ProsecutorRisk getProsecutorRisk(Groupify<TupleWrapper> population,
if (groupSize < smallestClassSize) {
smallestClassSize = groupSize;
}
// Compute rM
if (groupSize < largestClassSize) {
largestClassSize = groupSize;
}
// Compute rC
numClasses++;
numRecords += groupSize;
Expand All @@ -538,10 +567,13 @@ private ProsecutorRisk getProsecutorRisk(Groupify<TupleWrapper> population,
// Compute rB: smallest class is first class in the histogram
rB = 1d / smallestClassSize;

// Compute rB
rM = 1d / largestClassSize;

// Compute rC
rC = numClasses / numRecords;

// Return
return new ProsecutorRisk(threshold, rA, rB, rC);
return new ProsecutorRisk(threshold, rA, rB, rC, rM);
}
}

0 comments on commit f5db199

Please sign in to comment.