Commit f5ca5bc4 authored by Fabrizio Detassis's avatar Fabrizio Detassis
Browse files

small mods

parent 7c403eca
import numpy as np
from aiddl_core.representation.symbolic import Symbolic
from moving_target_abc import MovingTarget
from docplex.mp.model import Model as CPModel
from docplex.mp.model import DOcplexException
from constraint import InequalityRegGlobalConstraint, InequalityClsGlobalConstraint
from constraint import InequalityRegGlobalConstraint
from constraint import FairnessClsConstraint, FairnessRegConstraint
from constraint import EqualOpportunity, BalanceConstraint, EqualizedOdds
import utils
class MovingTargetClsCplex(MovingTarget):
......@@ -88,27 +86,26 @@ class MovingTargetClsCplex(MovingTarget):
cvar = c[1]
pfeat = c[2]
cval = c[3]
cstr = EqualOpportunity('ct', pfeat, cval)
cstr = EqualOpportunity('ct', self.y, pfeat, cval)
tpr = M.continuous_var_list(keys=len(pfeat), name='tpr')
# Add fairness constraint.
for i, ix_feat in enumerate(pfeat):
Np = np.sum(x_s[:, ix_feat] * y_s)
print("Feat " + str(ix_feat) + " Np " + str(Np))
if Np > 0:
tpr[i] = (1.0 / Np) * M.sum([x_s[j][ix_feat] * y_s[j] * x[j][1] for j in range(self.n_points)])
M.add_constraint(M.max(tpr)-M.min(tpr) <= cval, 'equal_opportunity')
M.add_constraint(M.max(tpr)-M.min(tpr) <= cval, 'equal_opportunity')
elif ctype == 'equalized-odds':
if self.n_classes > 2:
raise ValueError("Constraint 'equal-opportunity' is meant for binary classification!")
raise ValueError("Constraint 'equalized-odds' is meant for binary classification!")
cvar = c[1]
pfeat = c[2]
cval = c[3]
cstr = EqualizedOdds('ct', pfeat, cval)
cstr = EqualizedOdds('ct', self.y, pfeat, cval)
tpr = M.continuous_var_list(keys=len(pfeat), name='tpr')
fpr = M.continuous_var_list(keys=len(pfeat), name='fpr')
......@@ -117,20 +114,19 @@ class MovingTargetClsCplex(MovingTarget):
for i, ix_feat in enumerate(pfeat):
Np = np.sum(x_s[:, ix_feat] * y_s)
Nn = np.sum((1 - x_s[:, ix_feat]) * y_s)
print("Feat " + str(ix_feat) + " Np " + str(Np))
if Np > 0:
tpr[i] = (1.0 / Np) * M.sum([x_s[j][ix_feat] * y_s[j] * x[j][1] for j in range(self.n_points)])
if Nn > 0:
fpr[i] = (1.0 / Nn) * M.sum([(1 - x_s[j][ix_feat]) * y_s[j] * x[j][1] for j in range(self.n_points)])
M.add_constraint(M.max(tpr)-M.min(tpr) <= cval, 'equal_opportunity_pos')
M.add_constraint(M.max(fpr)-M.min(fpr) <= cval, 'equal_opportunity_neg')
M.add_constraint(M.max(tpr)-M.min(tpr) <= cval, 'equalized-odds_pos')
M.add_constraint(M.max(fpr)-M.min(fpr) <= cval, 'equalized-odds_neg')
else:
raise NotImplementedError("Constraint type not recognized " + str(ctype))
self.constraints.append(cstr)
#print("Constraint added: " + str(cstr))
print("Constraint added: " + str(cstr))
def m_alpha(self, M, L, y_k, alpha):
......@@ -158,8 +154,8 @@ class MovingTargetClsCplex(MovingTarget):
[
M.get_var_by_name("y_%s_%s" % (i, c)) for c in range(self.n_classes)
] for i in range(self.n_points)]
y_k = y_k.reshape(-1)
y_k = y_k.reshape(-1)
y_c = [self.inv_classes[y] for y in self.y]
yk_c = [self.inv_classes[int(y)] for y in y_k]
if L == 'HD' or L == 'HammingDistance':
......@@ -179,7 +175,7 @@ class MovingTargetClsCplex(MovingTarget):
for c in self.constraints:
sat = sat & c.is_satisfied(*d)
#print("Constraint satisfaction: " + str(sat))
print("Constraint satisfaction: " + str(sat))
return sat
def solve_ext(self, M):
......
import numpy as np
import utils
from aiddl_core.parser.parser import parse_term
from aiddl_core.representation.symbolic import Symbolic
import aiddl_core.function.default as dfun
from aiddl_core.container.container import Container
from scikit_learn_wrapper import SciKitLearnFunction, splitAiddlMlProblem
from sklearn.metrics import confusion_matrix, accuracy_score
from tools import CsvLoader
import factory
def test_fairness_reg(solver):
n_points = 30
......@@ -86,157 +97,287 @@ def test_fairness_reg(solver):
def test_bal_cls(solver):
n_points = 15
# Generation of synthetic data.
x = np.random.rand(n_points, 1)
y = np.clip(np.random.poisson(lam=3, size=(n_points, 1)), a_min=0, a_max=2)
d = x, y
# Loss function.
L = 'HD'
# Constraints declaration.
C = [
# ("<=", 'y', 1.2),
# (">=", 'y', 0.3),
("balance", 'y', 0),
]
# Params.
alpha = 1
beta = 1
print("Original data: ")
x, y = d
for i in range(len(x)):
print("%.2f \t %.2f" % (x[i], y[i]))
movt = utils.get_problem('classification', solver)
m = movt(None, n=1)
# CPLEX model initialization.
cplex_mod = m.initialize_ext(d)
# Add constraints.
m.add_constraints(cplex_mod, C, x, y)
# Check constraint satisfaction.
sat = m.check_constraints_ext(cplex_mod, C, d)
if not sat:
# Alpha step.
m.m_alpha(cplex_mod, L, y, alpha)
else:
# Beta step.
m.m_beta(cplex_mod, L, y, beta)
# Model solving.
y_new = m.solve_ext(cplex_mod)
print()
print("Final data: ")
print("x \t y \t y_new")
for i in range(len(x)):
print("%.2f \t %.2f \t %.2f" % (x[i], y[i], y_new[i]))
print()
lab, cnts = np.unique(y, return_counts=True)
std = np.std(cnts / np.sum(cnts))
print("Initial balance: %.2f" % std)
lab, cnts = np.unique(y_new, return_counts=True)
std = np.std(cnts / np.sum(cnts))
print("Final balance: %.2f" % std)
C = Container()
F = dfun.get_default_function_registry(C)
loader = CsvLoader()
train_data = loader.apply(parse_term('("../../resources/redwine_train.csv" "," "quality")'))
print("Label:", train_data[Symbolic("label")])
x, y = splitAiddlMlProblem(train_data)
length = len(x[0])
print(length)
ens_learner = parse_term('''{
py_module:sklearn.ensemble
py_class:RandomForestClassifier
n_estimators:50
max_depth:3
}''')
linear_learner = parse_term('''{
py_module:sklearn.linear_model
py_class:SGDClassifier
}''')
F = dfun.get_default_function_registry(C)
f_ML = SciKitLearnFunction(linear_learner)
F.add_function(Symbolic("mt.learner"), f_ML)
x, y = splitAiddlMlProblem(train_data)
y_0 = f_ML.apply(train_data)
mt_cfg = parse_term('''(
n:10 ;; Number of iterations
alpha:0.1 ;; Parameter alpha
beta:1 ;; Parameter beta
problem-type:classification ;; classification/regression?
constraint-solver:cplex ;; Select constraint solver (cplex/pysmt)
learner:mt.learner ;; URI of learner (registered above)
)''')
mt_data = parse_term('''{
loss-function:HammingDistance ;; Select loss function
constraints:{
(balance "quality" 0.05)
}
}''')
mt_data = mt_data.put_all(train_data)
P = factory.get_problem('classification', solver)
mtc = P(mt_cfg, F)
y_k = mtc.apply(mt_data)
i = 0
for y_k in mtc.y_k_history:
print("<<< Iteration %d >>>" % i)
lab, cnts = np.unique(y_k, return_counts=True)
std_k = np.std(cnts / np.sum(cnts))
acc_k = accuracy_score(y_k, np.array(y))
print("Accuracy: %.3f - Std_k: %.3f" % (acc_k, std_k))
i += 1
def test_fairness_cls(solver):
n_points = 50
pfeat = [1]
# Generation of synthetic data.
x1 = np.hstack([np.random.rand(n_points, 1),
np.full(shape=(n_points, 1), fill_value=0)])
x2 = np.hstack([np.random.rand(n_points, 1),
np.full(shape=(n_points, 1), fill_value=1)])
x = np.vstack([x1, x2])
y1 = np.random.poisson(lam=1, size=(n_points, 1))
y2 = np.random.poisson(lam=3, size=(n_points, 1))
y1 = np.clip(y1, a_min=0, a_max=2)
y2 = np.clip(y2, a_min=0, a_max=2)
y = np.vstack([y1, y2])
d = x, y
# Loss function.
L = 'HD'
didi_tr = utils.didi_c(x, y, pfeat)
# Costraints declaration.
C = [
# ("<=", 'y', 1.2),
# (">=", 'y', 0.3),
("didi-bin", 'y', pfeat, 0.5 * didi_tr),
]
# Params.
alpha = 1
beta = 1
print("Original data: ")
x, y = d
for i in range(len(x)):
print("%.2f \t %.2f" % (x[i, 0], y[i]))
movt = utils.get_problem('classification', solver)
m = movt(None, n=3)
# CPLEX model initialization.
cplex_mod = m.initialize_ext(d)
# Add constraints.
m.add_constraints(cplex_mod, C, x, y)
# Check constraint satisfaction.
sat = m.check_constraints_ext(cplex_mod, C, d)
if not sat:
# Alpha step.
m.m_alpha(cplex_mod, L, y, alpha)
else:
# Beta step.
m.m_beta(cplex_mod, L, y, beta)
# Model solving.
y_new = m.solve_ext(cplex_mod)
print()
print("Final data: ")
print("(x1, x2) \t y \t y_new")
for i in range(len(x)):
print("(%.2f, %d) \t %.2f \t %.2f" % (x[i, 0], x[i, 1], y[i], y_new[i]))
print("Original data: didi = %.2f" % utils.didi_c(x, y, pfeat))
print("Tot: %.2f \t x2 = 0: %.2f \t x2 = 1: %.2f" % (np.mean(y),
np.mean(y[x[:, 1] == 0]),
np.mean(y[x[:, 1] == 1])))
print("New data: didi = %.2f" % utils.didi_c(x, y_new, pfeat))
print("Tot: %.2f \t x2 = 0: %.2f \t x2 = 1: %.2f" % (np.mean(y_new),
np.mean(y_new[x[:, 1] == 0]),
np.mean(y_new[x[:, 1] == 1])))
print("Data: ")
print("(y, x1, x2)")
for i in range(len(x)):
print("(%d, %.5f, %d)" % (y[i], x[i, 0], x[i, 1]))
C = Container()
F = dfun.get_default_function_registry(C)
loader = CsvLoader()
train_data = loader.apply(parse_term('("../../resources/adult_train.csv" "," "income")'))
print("Label:", train_data[Symbolic("label")])
x, y = splitAiddlMlProblem(train_data)
length = len(x[0])
print("Features: ", length)
ens_learner = parse_term('''{
py_module:sklearn.ensemble
py_class:RandomForestClassifier
n_estimators:50
max_depth:3
}''')
linear_learner = parse_term('''{
py_module:sklearn.linear_model
py_class:SGDClassifier
}''')
F = dfun.get_default_function_registry(C)
f_ML = SciKitLearnFunction(linear_learner)
F.add_function(Symbolic("mt.learner"), f_ML)
x, y = splitAiddlMlProblem(train_data)
y_0 = f_ML.apply(train_data)
ix_feat = [46, 45, 44, 43, 42]
didi_0 = utils.didi_c(np.array(x), np.array(y), ix_feat)
print("Initial DIDI: %.2f" % didi_0)
mt_cfg = parse_term('''(
n:10 ;; Number of iterations
alpha:1 ;; Parameter alpha
beta:1 ;; Parameter beta
problem-type:classification ;; classification/regression?
constraint-solver:cplex ;; Select constraint solver (cplex/pysmt)
learner:mt.learner ;; URI of learner (registered above)
)''')
mt_data = parse_term('''{
loss-function:HammingDistance ;; Select loss function
constraints:{
(didi-bin
"income"
["race_Amer-Indian-Eskimo" "race_Asian-Pac-Islander" "race_Black" "race_Other" "race_White"]
%.2f)
}
}''' % (0.2 * didi_0))
mt_data = mt_data.put_all(train_data)
P = factory.get_problem('classification', solver)
mtc = P(mt_cfg, F)
y_k = mtc.apply(mt_data)
i = 0
for y_k in mtc.y_k_history:
print("<<< Iteration %d >>>" % i)
didi_k = utils.didi_c(np.array(x), y_k, [46, 45, 44, 43, 42])
acc_k = accuracy_score(y_k, np.array(y))
print("didi = %.3f, acc = %.3f" % (didi_k, acc_k))
print()
i += 1
def test_eq_opp_cls(solver):
C = Container()
F = dfun.get_default_function_registry(C)
loader = CsvLoader()
train_data = loader.apply(parse_term('("../../resources/adult_train.csv" "," "income")'))
print("Label:", train_data[Symbolic("label")])
x, y = splitAiddlMlProblem(train_data)
length = len(x[0])
print(length)
ens_learner = parse_term('''{
py_module:sklearn.ensemble
py_class:RandomForestClassifier
n_estimators:50
max_depth:3
}''')
linear_learner = parse_term('''{
py_module:sklearn.linear_model
py_class:SGDClassifier
}''')
F = dfun.get_default_function_registry(C)
f_ML = SciKitLearnFunction(linear_learner)
F.add_function(Symbolic("mt.learner"), f_ML)
x, y = splitAiddlMlProblem(train_data)
y_0 = f_ML.apply(train_data)
mt_cfg = parse_term('''(
n:6 ;; Number of iterations
alpha:0.1 ;; Parameter alpha
beta:1 ;; Parameter beta
problem-type:classification ;; classification/regression?
constraint-solver:cplex ;; Select constraint solver (cplex/pysmt)
learner:mt.learner ;; URI of learner (registered above)
)''')
mt_data = parse_term('''{
loss-function:HammingDistance ;; Select loss function
constraints:{
(equal-opportunity "income" ["race_Amer-Indian-Eskimo" "race_Asian-Pac-Islander" "race_Black" "race_Other" "race_White"] 0.05)
}
}''')
mt_data = mt_data.put_all(train_data)
P = factory.get_problem('classification', solver)
mtc = P(mt_cfg, F)
y_k = mtc.apply(mt_data)
i = 0
ix_feat = [46, 45, 44, 43, 42]
for y_k in mtc.y_k_history:
print("<<< Iteration %d >>>" % i)
for ix in ix_feat:
mask = np.array(x)[:, ix] == 1
# print("Feat " + str(ix) + " Pts " + str(np.sum(mask)))
print()
print("Confusion Matrix", confusion_matrix(np.array(y)[mask], y_k[mask]))
print("TPR: %.2f" % (np.sum(np.array(y)[mask] * y_k[mask]) / np.sum(np.array(y)[mask])))
didi_k = utils.equal_opportunity(np.array(x), np.array(y),
y_k, [46, 45, 44, 43, 42])
acc_k = accuracy_score(y_k, np.array(y))
print("eq_opp = %.3f, acc = %.3f" % (didi_k, acc_k))
print()
i += 1
def test_eq_odd_cls(solver):
C = Container()
F = dfun.get_default_function_registry(C)
loader = CsvLoader()
train_data = loader.apply(parse_term('("../../resources/adult_train.csv" "," "income")'))
print("Label:", train_data[Symbolic("label")])
x, y = splitAiddlMlProblem(train_data)
length = len(x[0])
print(length)
ens_learner = parse_term('''{
py_module:sklearn.ensemble
py_class:RandomForestClassifier
n_estimators:50
max_depth:3
}''')
linear_learner = parse_term('''{
py_module:sklearn.linear_model
py_class:SGDClassifier
}''')
F = dfun.get_default_function_registry(C)
f_ML = SciKitLearnFunction(ens_learner)
F.add_function(Symbolic("mt.learner"), f_ML)
x, y = splitAiddlMlProblem(train_data)
y_0 = f_ML.apply(train_data)
mt_cfg = parse_term('''(
n:10 ;; Number of iterations
alpha:0.1 ;; Parameter alpha
beta:1 ;; Parameter beta
problem-type:classification ;; classification/regression?
constraint-solver:ortools-cp ;; Select constraint solver (cplex/pysmt)
learner:mt.learner ;; URI of learner (registered above)
)''')
mt_data = parse_term('''{
loss-function:HammingDistance ;; Select loss function
constraints:{
(equalized-odds
"income"
["race_Amer-Indian-Eskimo" "race_Asian-Pac-Islander" "race_Black" "race_Other" "race_White"]
0.05)
}
}''')
mt_data = mt_data.put_all(train_data)
P = factory.get_problem('classification', solver)
mtc = P(mt_cfg, F)
y_k = mtc.apply(mt_data)
i = 0
ix_feat = [46, 45, 44, 43, 42]
for y_k in mtc.y_k_history:
print("<<< Iteration %d >>>" % i)
for ix in ix_feat:
mask = np.array(x)[:, ix] == 1
# print("Feat " + str(ix) + " Pts " + str(np.sum(mask)))
print("Confusion Matrix", confusion_matrix(np.array(y)[mask], y_k[mask]))
print("TPR: %.2f" % (np.sum(np.array(y)[mask] * y_k[mask]) / np.sum(np.array(y)[mask])))
print("FPR: %.2f" % (np.sum((1-np.array(y))[mask] * y_k[mask]) / np.sum((1-np.array(y))[mask])))
didi_k = utils.equalized_odds(np.array(x), np.array(y),
y_k, [46, 45, 44, 43, 42])
acc_k = accuracy_score(y_k, np.array(y))
print("eq_odds = %.3f, acc = %.3f" % (didi_k, acc_k))
print()
i += 1
if __name__ == '__main__':
solver = 'smt'
test_fairness_reg(solver)
solver = 'ortools-cp'
# test_fairness_reg(solver)
# test_bal_cls(solver)
# test_fairness_cls(solver)
# test_eq_opp_cls(solver)
test_eq_odd_cls(solver)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment