Commit 7c403eca authored by Fabrizio Detassis's avatar Fabrizio Detassis
Browse files

Added fairness constraints to OrTools solver and new CP-SAT solver implementation

parent 2851903d
......@@ -105,13 +105,14 @@ class BalanceConstraint(AbstractConstraint):
class EqualOpportunity(AbstractConstraint):
def __init__(self, name, pfeat, value):
def __init__(self, name, y, pfeat, value):
self.name = name
self.y = y
self.pfeat = pfeat
self.value = value
def is_satisfied(self, x, y):
value = utils.equal_opportunity(x, y, self.pfeat)
value = utils.equal_opportunity(x, self.y, y, self.pfeat)
return value <= self.value
def __str__(self):
......@@ -120,13 +121,14 @@ class EqualOpportunity(AbstractConstraint):
class EqualizedOdds(AbstractConstraint):
def __init__(self, name, pfeat, value):
def __init__(self, name, y, pfeat, value):
self.name = name
self.y = y
self.pfeat = pfeat
self.value = value
def is_satisfied(self, x, y):
value = utils.equalized_odds(x, y, self.pfeat)
value = utils.equalized_odds(x, self.y, y, self.pfeat)
return value <= self.value
def __str__(self):
......
from moving_target_cplex import MovingTargetClsCplex, MovingTargetRegCplex
from moving_target_smt import MovingTargetClsSMT, MovingTargetRegSMT
from moving_target_ortools import MovingTargetRegOT, MovingTargetClsOT
from moving_target_ortools import MovingTargetRegOT, MovingTargetClsOT, MovingTargetClsOTCP
def get_problem(problem_type, solver='cplex'):
......@@ -11,6 +11,8 @@ def get_problem(problem_type, solver='cplex'):
return MovingTargetClsSMT
elif solver == 'ortools':
return MovingTargetClsOT
elif solver == 'ortools-cp':
return MovingTargetClsOTCP
else:
raise ValueError("Solver " + str(solver) + " not recognized!")
......
......@@ -18,7 +18,9 @@ class MovingTarget(ABC):
self.beta = cfg[Symbolic("beta")].real_value()
self.test_data = test_data
self.y_k_history = []
self.z_k_history = []
self.y_k_test_history = []
self.z_k_test_history = []
def apply(self, args):
ml_problem = args
......@@ -29,7 +31,7 @@ class MovingTarget(ABC):
# Initial step.
M = self.initialize_ext(d)
self.add_constraints(M, C, d[0], d[1])
self.add_constraints(M, C, *d)
# self.set_loss_function(M, L, d)
# d_data = self.get_pysmt_data(y_k)
......@@ -37,7 +39,6 @@ class MovingTarget(ABC):
y_k = np.array(y_k.unpack())
for i in range(0, self.n):
self.y_k_history.append(y_k)
if self.test_data is not None:
self.y_k_test_history.append(self.ML.predict(self.test_data))
......@@ -50,8 +51,11 @@ class MovingTarget(ABC):
z_k = self.solve_ext(M)
ml_problem = self.assemble_ml_problem(ml_problem, z_k)
y_k = self.ML.apply(ml_problem)
y_k = np.array(y_k.unpack())
self.y_k_history.append(y_k)
self.z_k_history.append(z_k)
y_k = np.array(y_k.unpack())
return y_k
@abstractmethod
......
import numpy as np
from fractions import Fraction
from decimal import Decimal
from moving_target_abc import MovingTarget
from ortools.linear_solver.pywraplp import Solver
from constraint import InequalityRegGlobalConstraint, BalanceConstraint
from constraint import FairnessRegConstraint, FairnessClsConstraint
from ortools.sat.python import cp_model
from ortools.sat.python.cp_model import CpModel, CpSolver
from ortools.sat.python.cp_model import LinearExpr
from constraint import (
InequalityRegGlobalConstraint,
BalanceConstraint,
FairnessRegConstraint,
FairnessClsConstraint,
EqualOpportunity,
EqualizedOdds
)
class MovingTargetRegOT(MovingTarget):
......@@ -223,19 +235,19 @@ class MovingTargetClsOT(MovingTarget):
('fairness', y, 1.0),
]
"""
for c in C:
ctype = c[0]
for ct in C:
ctype = ct[0]
# Translate the constraint to SMT language.
x = [
[M.LookupVariable("y_%d_%d" % (j, i)) for i in range(self.n_classes)]
for j in range(self.n_points)
[M.LookupVariable("y_%d_%d" % (i, c)) for c in range(self.n_classes)]
for i in range(self.n_points)
]
# Store the constraint in the class.
if ctype == 'balance':
cvar = c[1]
cval = c[2]
cvar = ct[1]
cval = ct[2]
cstr = BalanceConstraint('ct', cval)
B = int(np.ceil(self.n_points / self.n_classes))
for c in range(self.n_classes):
......@@ -243,9 +255,9 @@ class MovingTargetClsOT(MovingTarget):
M.Add(xpr <= B + cval)
elif ctype == 'didi-bin':
cvar = c[1]
pfeat = c[2]
cval = c[3]
cvar = ct[1]
pfeat = ct[2]
cval = ct[3]
cstr = FairnessClsConstraint('ct', pfeat, cval)
new_vars = ["abs_%d" % i for i in range(len(np.unique(x_s[:, pfeat])) * self.n_classes)]
for v in new_vars:
......@@ -275,6 +287,105 @@ class MovingTargetClsOT(MovingTarget):
constraint = M.Sum(abs_val)
M.Add(constraint <= cval)
elif ctype == 'equal-opportunity':
if self.n_classes > 2:
raise ValueError("Constraint 'equal-opportunity' is meant for binary classification!")
cvar = ct[1]
pfeat = ct[2]
cval = ct[3]
cstr = EqualOpportunity('ct', self.y, pfeat, cval)
# Additional support variables
new_vars = ["tpr_%d" % i for i in range(len(pfeat))]
for v in new_vars:
if v not in self.supp_variables:
M.NumVar(lb=0, ub=M.infinity(), name=v)
self.supp_variables.add(v)
tpr = [M.LookupVariable(v) for v in new_vars]
# Max / Min support variables.
new_vars = ["max", "min"]
for v in new_vars:
if v not in self.supp_variables:
M.NumVar(lb=0, ub=M.infinity(), name=v)
self.supp_variables.add(v)
max = M.LookupVariable("max")
min = M.LookupVariable("min")
# Add equal opportunity constraint.
for i, ix_feat in enumerate(pfeat):
Np = np.sum(x_s[:, ix_feat] * y_s)
if Np > 0:
tpr[i] = (1.0 / Np) * M.Sum([x_s[j][ix_feat] * y_s[j] * x[j][1] for j in range(self.n_points)])
M.Add(max >= tpr[i])
M.Add(min <= tpr[i])
M.Add(max-min <= cval, 'equal_opportunity')
elif ctype == 'equalized-odds':
if self.n_classes > 2:
raise ValueError("Constraint 'equalized-odds' is meant for binary classification!")
cvar = ct[1]
pfeat = ct[2]
cval = ct[3]
cstr = EqualizedOdds('ct', self.y, pfeat, cval)
# Additional support variables
new_vars = ["tpr_%d" % i for i in range(len(pfeat))]
for v in new_vars:
if v not in self.supp_variables:
M.NumVar(lb=0, ub=M.infinity(), name=v)
self.supp_variables.add(v)
tpr = [M.LookupVariable(v) for v in new_vars]
# Additional support variables
new_vars = ["fpr_%d" % i for i in range(len(pfeat))]
for v in new_vars:
if v not in self.supp_variables:
M.NumVar(lb=0, ub=M.infinity(), name=v)
self.supp_variables.add(v)
fpr = [M.LookupVariable(v) for v in new_vars]
# Max / Min support variables.
new_vars = ["max_tpr", "min_tpr", "max_fpr", "min_fpr"]
for v in new_vars:
if v not in self.supp_variables:
M.NumVar(lb=0, ub=M.infinity(), name=v)
self.supp_variables.add(v)
max_tpr = M.LookupVariable("max_tpr")
min_tpr = M.LookupVariable("min_tpr")
max_fpr = M.LookupVariable("max_fpr")
min_fpr = M.LookupVariable("min_fpr")
# Add constraint.
for i, ix_feat in enumerate(pfeat):
Np = np.sum(x_s[:, ix_feat] * y_s)
Nn = np.sum((1 - x_s[:, ix_feat]) * y_s)
if Np > 0:
tpr[i] = (1.0 / Np) * M.Sum([x_s[j][ix_feat] * y_s[j] * x[j][1] for j in range(self.n_points)])
if Nn > 0:
fpr[i] = (1.0 / Nn) * M.Sum([(1 - x_s[j][ix_feat]) * y_s[j] * x[j][1] for j in range(self.n_points)])
M.Add(max_tpr >= tpr[i])
M.Add(min_tpr <= tpr[i])
M.Add(max_fpr >= fpr[i])
M.Add(min_fpr <= fpr[i])
M.Add(max_tpr-min_tpr <= cval, 'equalized-odds_pos')
M.Add(max_fpr-min_fpr <= cval, 'equalized-odds_neg')
else:
raise NotImplementedError("Constraint type not recognized " + str(ctype))
......@@ -285,8 +396,8 @@ class MovingTargetClsOT(MovingTarget):
def m_alpha(self, M: Solver, L, y_k, alpha):
x = [
[M.LookupVariable("y_%d_%d" % (j, i)) for i in range(self.n_classes)]
for j in range(self.n_points)
[M.LookupVariable("y_%d_%d" % (i, c)) for c in range(self.n_classes)]
for i in range(self.n_points)
]
y_c = [self.inv_classes[y] for y in self.y]
......@@ -299,14 +410,15 @@ class MovingTargetClsOT(MovingTarget):
raise NotImplementedError("Loss function not recognized!")
obj_func = y_loss + (1.0 / alpha) * p_loss
M.loss = obj_func
M.Minimize(obj_func)
def m_beta(self, M: Solver, L, y_k, beta):
x = [
[M.LookupVariable("y_%d_%d" % (j, i)) for i in range(self.n_classes)]
for j in range(self.n_points)
[M.LookupVariable("y_%d_%d" % (i, c)) for c in range(self.n_classes)]
for i in range(self.n_points)
]
y_c = [self.inv_classes[y] for y in self.y]
yk_c = [self.inv_classes[int(y)] for y in y_k]
if L == 'HD' or L == 'HammingDistance':
......@@ -318,11 +430,10 @@ class MovingTargetClsOT(MovingTarget):
obj_func = y_loss
M.Add(p_loss <= beta)
M.loss = obj_func
M.Minimize(obj_func)
def check_constraints_ext(self, M, C, d):
sat = True
for c in self.constraints:
sat = sat & c.is_satisfied(*d)
......@@ -347,7 +458,7 @@ class MovingTargetClsOT(MovingTarget):
return np.array([self.classes[y] for y in yc])
def initialize_ext(self, d, name='smt_model'):
def initialize_ext(self, d, name='ortools_model'):
x, y = d
......@@ -370,8 +481,8 @@ class MovingTargetClsOT(MovingTarget):
self.n_classes = len(self.classes.keys())
self.n_points = len(y)
[
[mod.IntVar(lb=0, ub=1, name="y_%d_%d" % (j, i)) for i in range(self.n_classes)]
for j in range(self.n_points)]
[mod.IntVar(lb=0, ub=1, name="y_%d_%d" % (i, c)) for c in range(self.n_classes)]
for i in range(self.n_points)]
for i in range(self.n_points):
xpr = mod.Sum([
......@@ -386,3 +497,286 @@ class MovingTargetClsOT(MovingTarget):
return mod
class MovingTargetClsOTCP(MovingTarget):
_INF = int(1e6)
class Model(CpModel):
def __init__(self):
super().__init__()
self.x = None
self.y = None
self.n_points = None
self.classes = None
self.inv_classes = None
self.n_classes = None
self.variables = dict()
self.constraints = list()
def add_variable(self, name, lb, ub):
""" If not present, initializes a new variable in the model and returns it. """
v = self.get_variable(name)
if v is None:
v = self.NewIntVar(lb=lb, ub=ub, name=name)
self.variables[name] = v
return v
def get_variable(self, name):
if name in self.variables:
return self.variables[name]
else:
return None
def add_constraints(self, M: Model, C, x_s, y_s):
"""
Constraints are expressed with a list/set? of tuples, i.e.:
C = [
('>', y, -30),
('fairness', y, 1.0),
]
"""
for ct in C:
ctype = ct[0]
# Translate the constraint to host language.
x = [
[M.get_variable(name="y_%d_%d" % (i, c)) for c in range(M.n_classes)]
for i in range(M.n_points)
]
# Store the constraint in the class.
if ctype == 'balance':
cvar = ct[1]
cval = ct[2]
cstr = BalanceConstraint('ct', cval)
B = int(np.ceil(M.n_points / M.n_classes))
num, den = self.get_fractional_repr(B + cval)
for c in range(M.n_classes):
xpr = LinearExpr.Sum([x[i][c] for i in range(M.n_points)])
M.Add(den * xpr <= num)
elif ctype == 'didi-bin':
cvar = ct[1]
pfeat = ct[2]
cval = ct[3]
cstr = FairnessClsConstraint('ct', pfeat, cval)
abs_val = [M.add_variable(lb=0, ub=self._INF, name="abs_%d" % i)
for i in range(len(np.unique(x_s[:, pfeat])) * M.n_classes)]
# Add fairness constraint.
for ix_feat in pfeat:
xvals = np.unique(x_s[:, ix_feat])
i = 0
for xv in xvals:
mask = 1 * (x_s[:, ix_feat] == xv).reshape(-1, 1)
for c in range(M.n_classes):
Np = np.sum(mask)
if Np > 0:
multiplier = (M.n_points / Np)
tmp = LinearExpr.Sum([mask[j][0] * x[j][c] for j in range(M.n_points)]) - \
LinearExpr.Sum([x[j][c] for j in range(M.n_points)])
num, den = self.get_fractional_repr(multiplier)
# Linearization of the absolute value.
M.Add(den * abs_val[i] >= num * tmp)
M.Add(den * abs_val[i] >= - num * tmp)
i += 1
constraint = LinearExpr.Sum(abs_val)
num, den = self.get_fractional_repr(cval)
M.Add(den * constraint <= num)
elif ctype == 'equal-opportunity':
if M.n_classes > 2:
raise ValueError("Constraint 'equal-opportunity' is meant for binary classification!")
cvar = ct[1]
pfeat = ct[2]
cval = ct[3]
cstr = EqualOpportunity('ct', M.y, pfeat, cval)
# Additional support variables
tpr = [M.add_variable(lb=0, ub=self._INF, name="tpr_%d" % i) for i in range(len(pfeat))]
# Max / Min support variables.
max = M.add_variable(lb=0, ub=self._INF, name='max')
min = M.add_variable(lb=0, ub=self._INF, name='min')
# Add equal opportunity constraint.
for i, ix_feat in enumerate(pfeat):
mask = 1 * (x_s[:, ix_feat] == 1).reshape(-1, 1)
Np = np.sum(x_s[:, ix_feat] * y_s)
if Np > 0:
tpr[i] = LinearExpr.Sum([mask[j][0] * int(y_s[j]) * x[j][1]
for j in range(M.n_points)])
num_Np, den_Np = self.get_fractional_repr(1.0 / Np)
M.Add(den_Np * max >= num_Np * tpr[i])
M.Add(den_Np * min <= num_Np * tpr[i])
num_c, den_c = self.get_fractional_repr(cval)
M.Add(den_c * (max - min) <= num_c)
elif ctype == 'equalized-odds':
if M.n_classes > 2:
raise ValueError("Constraint 'equalized-odds' is meant for binary classification!")
cvar = ct[1]
pfeat = ct[2]
cval = ct[3]
cstr = EqualizedOdds('ct', M.y, pfeat, cval)
# Additional support variables
tpr = [M.add_variable(lb=0, ub=self._INF, name="tpr_%d" % i) for i in range(len(pfeat))]
fpr = [M.add_variable(lb=0, ub=self._INF, name="fpr_%d" % i) for i in range(len(pfeat))]
# Max / Min support variables.
max_tpr = M.add_variable(lb=0, ub=self._INF, name='max_tpr')
min_tpr = M.add_variable(lb=0, ub=self._INF, name='min_tpr')
max_fpr = M.add_variable(lb=0, ub=self._INF, name='max_fpr')
min_fpr = M.add_variable(lb=0, ub=self._INF, name='min_fpr')
# Add constraint.
for i, ix_feat in enumerate(pfeat):
mask = 1 * (x_s[:, ix_feat] == 1).reshape(-1, 1)
Np = np.sum(x_s[:, ix_feat] * y_s)
Nn = np.sum((1 - x_s[:, ix_feat]) * y_s)
if Np > 0:
tpr[i] = LinearExpr.Sum([mask[j][0] * int(y_s[j]) * x[j][1]
for j in range(M.n_points)])
if Nn > 0:
fpr[i] = LinearExpr.Sum([(1 - mask[j][0]) * int(y_s[j]) * x[j][1]
for j in range(M.n_points)])
num_Np, den_Np = self.get_fractional_repr(1.0 / Np)
num_Nn, den_Nn = self.get_fractional_repr(1.0 / Nn)
M.Add(den_Np * max_tpr >= num_Np * tpr[i])
M.Add(den_Np * min_tpr <= num_Np * tpr[i])
M.Add(den_Nn * max_fpr >= num_Nn * fpr[i])
M.Add(den_Nn * min_fpr <= num_Nn * fpr[i])
num_c, den_c = self.get_fractional_repr(cval)
M.Add(den_c * (max_tpr-min_tpr) <= num_c)
M.Add(den_c * (max_fpr-min_fpr) <= num_c)
else:
raise NotImplementedError("Constraint type not recognized " + str(ctype))
M.constraints.append(cstr)
print("Constraint added: " + str(cstr))
def m_alpha(self, M: Model, L, y_k, alpha):
x = [
[M.get_variable(name="y_%d_%d" % (i, c)) for c in range(M.n_classes)]
for i in range(M.n_points)
]
y_c = [M.inv_classes[y] for y in M.y]
yk_c = [M.inv_classes[int(y)] for y in y_k]
if L == 'HD' or L == 'HammingDistance':
y_loss = LinearExpr.Sum([(1 - x[i][y_c[i]]) for i in range(M.n_points)])
p_loss = LinearExpr.Sum([(1 - x[i][yk_c[i]]) for i in range(M.n_points)])
else:
raise NotImplementedError("Loss function not recognized!")
num_alpha, den_alpha = self.get_fractional_repr(1 / alpha)
obj_func = den_alpha * y_loss + num_alpha * p_loss
M.Minimize(obj_func)
def m_beta(self, M: Model, L, y_k, beta):
x = [
[M.get_variable(name="y_%d_%d" % (i, c)) for c in range(M.n_classes)]
for i in range(M.n_points)
]
y_c = [M.inv_classes[y] for y in M.y]
yk_c = [M.inv_classes[int(y)] for y in y_k]
if L == 'HD' or L == 'HammingDistance':
y_loss = LinearExpr.Sum([(1 - x[i][y_c[i]]) for i in range(M.n_points)])
p_loss = LinearExpr.Sum([(1 - x[i][yk_c[i]]) for i in range(M.n_points)])
else:
raise NotImplementedError("Loss function not recognized!")
num_beta, den_beta = self.get_fractional_repr(beta)
obj_func = y_loss
M.Add(den_beta * p_loss <= num_beta)
M.Minimize(obj_func)
def check_constraints_ext(self, M: Model, C, d):
sat = True
for c in M.constraints:
sat = sat & c.is_satisfied(*d)
print("Constraint satisfaction: " + str(sat))
return sat
def solve_ext(self, M: Model):
print("Solving")
solver = CpSolver()
solver_status = solver.Solve(M)
if solver_status not in (cp_model.FEASIBLE, cp_model.OPTIMAL):
raise ValueError("Model could not be solved! " + str(solver.ResponseStats()))
# Retrieve solution.
y_opt = np.array([
[
solver.Value(M.get_variable("y_%d_%d" % (i, c))) for c in range(M.n_classes)
] for i in range(M.n_points)])
yc = np.argmax(y_opt, axis=1)
return np.array([M.classes[y] for y in yc])
def initialize_ext(self, d, name='cp-sat_model'):
print("Initializing CP-SAT Solver")
x, y = d