Commit 79a115a3 authored by Uwe Köckemann's avatar Uwe Köckemann
Browse files

Merge branch 'master' of gitsvn-nt.oru.se:uwe.kockemann/moving-targets

parents 9f8b084e 3a51de7b
......@@ -65,12 +65,12 @@
(#tuple moving-targets-problem
(
examples:$examples ;; Regular supervised ML problem
loss-function:MeanSquaredError ;; Select available loss function
loss-function:MeanAbsoluteError ;; Select available loss function
constraints:{
(>= y -30) ;; Each element in set needs to be translated to PySMT
(didi-real y [x] 0.5)
(didi-bin y [x] 0.5)
(balance y 0.1)
;; (didi-real y [x] 0.5)
;; (didi-bin y [x] 0.5)
;; (balance y 0.1)
}
type:classification
alpha:1 ;; Parameter alpha
......
(#mod self org.aiddl.example.moving-target.test-01)
(#nms EVAL org.aiddl.eval-ht)
(#req SL org.aiddl.common.learning.supervised)
(^RegressionProblem@SL examples
(
attributes:[(y ^#real) (x1 ^#real) (x2 ^#real)]
label:y
data:[
(2 0.46876 0)
(1 0.68725 0)
(2 0.70889 0)
(2 0.24617 0)
(2 0.64563 0)
(2 0.20468 0)
(1 0.37330 0)
(1 0.58604 0)
(1 0.42809 0)
(0 0.05272 0)
(2 0.25810 0)
(1 0.15931 0)
(0 0.58373 0)
(0 0.15215 0)
(1 0.31580 0)
(1 0.48765 0)
(2 0.23271 0)
(0 0.33075 0)
(0 0.02454 0)
(0 0.27108 0)
(1 0.82266 0)
(0 0.65037 0)
(0 0.11161 0)
(2 0.50958 0)
(0 0.80177 0)
(2 0.28616 0)
(1 0.74214 0)
(0 0.93101 0)
(0 0.57490 0)
(2 0.28891 0)
(0 0.02450 0)
(0 0.29775 0)
(0 0.08457 0)
(1 0.57290 0)
(1 0.98779 0)
(0 0.21251 0)
(1 0.30324 0)
(0 0.37703 0)
(0 0.15422 0)
(2 0.90874 0)
(0 0.52846 0)
(0 0.03465 0)
(2 0.66185 0)
(1 0.18609 0)
(0 0.22061 0)
(2 0.07671 0)
(2 0.89159 0)
(2 0.34661 0)
(2 0.31532 0)
(2 0.97554 0)
(0 0.63249 1)
(1 0.73163 1)
(2 0.41826 1)
(2 0.35865 1)
(2 0.67698 1)
(1 0.01482 1)
(0 0.71497 1)
(2 0.57192 1)
(1 0.06882 1)
(1 0.47431 1)
(2 0.12842 1)
(0 0.38252 1)
(2 0.35838 1)
(1 0.14697 1)
(2 0.10600 1)
(2 0.27471 1)
(0 0.59134 1)
(2 0.36586 1)
(2 0.89406 1)
(2 0.38183 1)
(2 0.42676 1)
(1 0.27430 1)
(1 0.41646 1)
(2 0.65456 1)
(1 0.71556 1)
(2 0.97935 1)
(2 0.48944 1)
(1 0.09128 1)
(2 0.93509 1)
(2 0.80899 1)
(2 0.29443 1)
(2 0.75088 1)
(2 0.74210 1)
(2 0.54209 1)
(1 0.73187 1)
(2 0.15325 1)
(2 0.55892 1)
(2 0.18299 1)
(1 0.97745 1)
(2 0.85193 1)
(2 0.91503 1)
(1 0.69043 1)
(1 0.20993 1)
(2 0.79305 1)
(2 0.68840 1)
(2 0.87589 1)
(2 0.90116 1)
(2 0.30976 1)
(2 0.79235 1)
(1 0.65276 1)
]
))
(#tuple moving-targets-problem
(
examples:$examples ;; Regular supervised ML problem
loss-function:HammingDistance ;; Select available loss function
constraints:{
;; (>= y -30) ;; Each element in set needs to be translated to PySMT
;; (didi-real y [x2] 0.5)
;; (didi-bin y [x2] 0.5)
(balance y 0.1)
}
alpha:1 ;; Parameter alpha
beta:1 ;; Parameter beta
))
\ No newline at end of file
(#mod self org.aiddl.example.moving-target.test-01)
(#nms EVAL org.aiddl.eval-ht)
(#req SL org.aiddl.common.learning.supervised)
(^RegressionProblem@SL examples
(
attributes:[(y ^#real) (x1 ^#real) (x2 ^#real)]
label:y
data:[
(0.14399 0.93351 0)
(0.10569 0.57763 0)
(0.15547 0.20965 0)
(0.01678 0.36641 0)
(0.05720 0.67262 0)
(0.11661 0.97933 0)
(0.12481 0.91705 0)
(0.20830 0.58882 0)
(0.00994 0.46877 0)
(0.07117 0.31546 0)
(0.00000 0.88777 0)
(0.22190 0.18996 0)
(0.17124 0.20468 0)
(0.05489 0.86582 0)
(0.14873 0.49931 0)
(0.10313 0.50076 0)
(0.12818 0.14048 0)
(0.16510 0.21265 0)
(0.04335 0.14884 0)
(0.05335 0.43776 0)
(0.09383 0.35457 0)
(0.09007 0.01122 0)
(0.10815 0.98088 0)
(0.09873 0.99758 0)
(0.16206 0.81442 0)
(0.08034 0.84978 0)
(0.04674 0.95845 0)
(0.16197 0.05119 0)
(0.17001 0.54125 0)
(0.20784 0.02575 0)
(0.13035 0.01298 0)
(0.19193 0.22368 0)
(0.06557 0.39587 0)
(0.22974 0.46312 0)
(0.17473 0.01764 0)
(0.09336 0.47501 0)
(0.01593 0.99950 0)
(0.06919 0.15301 0)
(0.16508 0.29693 0)
(0.21398 0.68030 0)
(0.10951 0.11171 0)
(0.06320 0.33794 0)
(0.08470 0.75342 0)
(0.11798 0.04564 0)
(0.04152 0.84485 0)
(0.04467 0.38008 0)
(0.07386 0.00816 0)
(0.13453 0.88137 0)
(0.08867 0.15829 0)
(0.10146 0.96530 0)
(0.60087 0.58609 1)
(0.28802 0.80639 1)
(0.45752 0.34234 1)
(0.40707 0.20283 1)
(0.36236 0.62417 1)
(0.55087 0.69895 1)
(0.73721 0.68265 1)
(0.38855 0.04170 1)
(0.51397 0.57022 1)
(0.29755 0.98608 1)
(0.38471 0.47996 1)
(0.26678 0.77691 1)
(0.64121 0.78459 1)
(0.39326 0.76393 1)
(0.90136 0.15291 1)
(0.35579 0.51981 1)
(0.66407 0.49847 1)
(0.38942 0.19926 1)
(0.40315 0.85785 1)
(0.45099 0.16807 1)
(0.30725 0.20619 1)
(0.44424 0.96056 1)
(0.66612 0.87987 1)
(0.27660 0.81143 1)
(0.46951 0.96977 1)
(0.32858 0.52235 1)
(0.74554 0.22402 1)
(0.48748 0.90914 1)
(0.55029 0.86873 1)
(0.64465 0.19226 1)
(0.75841 0.44601 1)
(0.30840 0.04392 1)
(0.40345 0.90458 1)
(0.06413 0.60732 1)
(0.55159 0.12752 1)
(0.53627 0.48400 1)
(0.37135 0.60513 1)
(0.42941 0.65884 1)
(0.75241 0.74990 1)
(0.63155 0.87993 1)
(0.21681 0.10326 1)
(0.19754 0.25903 1)
(0.63268 0.73537 1)
(0.28253 0.35073 1)
(0.43952 0.96812 1)
(0.95463 0.01418 1)
(0.49971 0.49996 1)
(0.18556 0.20309 1)
(1.00000 0.23667 1)
(0.38108 0.75320 1)
]
))
(#tuple moving-targets-problem
(
examples:$examples ;; Regular supervised ML problem
loss-function:MeanSquaredError ;; Select available loss function
constraints:{
(>= y -30) ;; Each element in set needs to be translated to PySMT
(didi-real y [x2] 0.5)
;; (didi-bin y [x] 0.5)
;; (balance y 0.1)
}
alpha:1 ;; Parameter alpha
beta:1 ;; Parameter beta
))
\ No newline at end of file
# Input Parameters:
# - n_points -> number of examples.
# - n_classes -> number of classes
import numpy as np
from docplex.mp.model import Model as CPModel
def didi_r(x, y, pfeat):
"""
Compute the disparate impact discrimination index of a given dataset.
"""
n_points = len(y)
tot = .0
for ix_feat in pfeat:
vals = np.unique(x[:, ix_feat])
for v in vals:
mask = (1 * (x[:, ix_feat] == v)).reshape(-1, 1)
yp = mask * y
# print(yp)
Np = np.sum(mask)
if Np > 0:
tmp = (1.0 / Np) * np.sum(yp) - \
(1.0 / n_points) * np.sum(y)
tot += np.abs(tmp)
return tot
n_points = 50
pfeat = [1]
x1 = np.hstack([np.random.rand(n_points, 1),
np.full(shape=(n_points, 1), fill_value=0)])
x2 = np.hstack([np.random.rand(n_points, 1),
np.full(shape=(n_points, 1), fill_value=1)])
x = np.vstack([x1, x2])
y1 = np.random.normal(loc=0.0, scale=1.0, size=(n_points, 1))
y2 = np.random.normal(loc=5.0, scale=3.0, size=(n_points, 1))
y = np.vstack([y1, y2])
y = (y - y.min()) / (y.max() - y.min())
didi_tr = didi_r(x, y, pfeat)
constraint_value = .6 * didi_tr
print("DIDI train: %.3f -> CT value: %.3f" % (didi_tr, constraint_value))
# Build a model
mod = CPModel('Fairness problem')
# Variable declaration.
n_points = len(y)
idx_var = [i for i in range(n_points)]
z = mod.continuous_var_list(keys=idx_var, lb=0.0, ub=1.0, name='z')
# Fairness constraint: instead of adding a penalization term in the objective function - as done by
# Phebe et al - I impose the fairness term to stay below a certain threshold.
constraint = .0
abs_val = mod.continuous_var_list(keys=len(np.unique(x[:, pfeat])), name='abs_val')
# Add fairness constraint.
for ix_feat in pfeat:
vals = np.unique(x[:, ix_feat])
for i, v in enumerate(vals):
mask = 1 * (x[:, ix_feat] == v).reshape(-1, 1)
yp = mask * y
Np = np.sum(mask)
# print("Np", Np)
# print("yp", yp)
if Np > 0:
tmp = (1.0 / n_points) * mod.sum(z) -\
(1.0 / Np) * mod.sum([yp[j][0] * z[j] for j in idx_var])
# Linearization of the absolute value.
mod.add_constraint(abs_val[i] >= tmp)
mod.add_constraint(abs_val[i] >= -tmp)
constraint += mod.sum(abs_val)
mod.add_constraint(constraint <= constraint_value, ctname='fairness_cnst')
# Objective Function.
y_loss = (1.0 / n_points) * mod.sum([(y[i][0] - z[i]) * (y[i][0] - z[i]) for i in idx_var])
# y_loss = (1.0 / n_points) * mod.sum([mod.abs(y[i][0] - z[i]) for i in idx_var])
mod.minimize(y_loss)
# Solve the problem
sol = mod.solve()
# print(mod.get_constraint_by_index(0))
print("Objective fct value: %.2f" % sol.get_value(y_loss))
print("Fairness ct: ", sol.get_value(constraint))
if sol:
sat = mod.get_solve_status()
print("Status: " + str(sat))
y_new = np.array([
sol.get_value(z[i]) for i in range(n_points)
], dtype=float).reshape(-1, 1)
print()
print("Final data: ")
print("(x1, x2) \t y \t y_new")
for i in range(len(x)):
print("(%.2f, %d) \t %.2f \t %.2f" % (x[i, 0], x[i, 1], y[i], y_new[i]))
print("Original data: didi = %.2f" % didi_r(x, y, pfeat))
print("Tot: %.2f \t x2 = 0: %.2f \t x2 = 1: %.2f" % (np.mean(y),
np.mean(y[x[:, 1] == 0]),
np.mean(y[x[:, 1] == 1])))
print("New data: didi = %.2f" % didi_r(x, y_new, pfeat))
print("Tot: %.2f \t x2 = 0: %.2f \t x2 = 1: %.2f" % (np.mean(y_new),
np.mean(y_new[x[:, 1] == 0]),
np.mean(y_new[x[:, 1] == 1])))
else:
print("No solution found")
import numpy as np
from abc import ABC, abstractmethod
import utils
class AbstractConstraint(ABC):
......@@ -9,7 +10,7 @@ class AbstractConstraint(ABC):
pass
class InequalityGlobalConstraint(AbstractConstraint):
class InequalityRegGlobalConstraint(AbstractConstraint):
def __init__(self, name, sense, rhs):
self.name = name
......@@ -20,21 +21,113 @@ class InequalityGlobalConstraint(AbstractConstraint):
self._checker = self.leq
elif sense == '>=':
self._checker = self.geq
elif sense == '>':
self._checker = self.gt
elif sense == '<':
self._checker = self.lt
else:
raise ValueError("Sense " + str(sense) + " not understood!")
def leq(self, value):
return value <= self.rhs
def lt(self, value):
return value < self.rhs
def geq(self, value):
return value >= self.rhs
def gt(self, value):
return value > self.rhs
def is_satisfied(self, x, y):
c = np.apply_along_axis(self._checker, axis=0, arr=y)
return np.all(c)
def __str__(self):
return "InequalityRegGlobalConstraint: y " + str(self.sense) + " " + str(self.rhs)
class InequalityClsGlobalConstraint(AbstractConstraint):
def __init__(self, name, sense, rhs):
self.name = name
self.sense = sense
self.rhs = rhs
if sense == '<=':
self._checker = self.leq
elif sense == '>=':
self._checker = self.geq
elif sense == '>':
self._checker = self.gt
elif sense == '<':
self._checker = self.lt
else:
raise ValueError("Sense " + str(sense) + " not understood!")
def leq(self, value):
return value <= self.rhs
def lt(self, value):
return value < self.rhs
def geq(self, value):
return value >= self.rhs
def gt(self, value):
return value > self.rhs
class FairnessConstraint(AbstractConstraint):
# @todo
def is_satisfied(self, x, y):
pass
classes = np.argmax(y, axis=1)
c = np.apply_along_axis(self._checker, axis=0, arr=y)
return np.all(c)
def __str__(self):
return "InequalityClsGlobalConstraint: y " + str(self.sense) + " " + str(self.rhs)
class BalanceConstraint(AbstractConstraint):
def __init__(self, name, value):
self.name = name
self.value = value
def is_satisfied(self, x, y):
lab, cnts = np.unique(y, return_counts=True)
n_points = len(y)
return np.max(cnts) <= int(np.ceil(n_points / len(lab))) + self.value
def __str__(self):
return "BalanceConstraint with value " + str(self.value)
class FairnessRegConstraint(AbstractConstraint):
def __init__(self, name, pfeat, value):
self.name = name
self.pfeat = pfeat
self.value = value
def is_satisfied(self, x, y):
didi = utils.didi_r(x, y, self.pfeat)
return didi <= self.value
def __str__(self):
return "FairnessConstraint: didi <= " + str(self.value)
class FairnessClsConstraint(AbstractConstraint):
def __init__(self, name, pfeat, value):
self.name = name
self.pfeat = pfeat
self.value = value
def is_satisfied(self, x, y):
didi = utils.didi_c(x, y, self.pfeat)
return didi <= self.value
def __str__(self):
return "FairnessConstraint: didi <= " + str(self.value)
......@@ -15,7 +15,8 @@ from aiddl_network.grpc_function import GrpcFunction
from aiddl_network.aiddl_grpc_server import AiddlServicer
from aiddl_network.aiddl_grpc_server import LOADER_URI
from moving_target_cplex import MovingTargetRegCplex
from moving_target_cplex import MovingTargetRegCplex, MovingTargetClsCplex
from moving_target_smt import MovingTargetRegSMT
# Loaded modules (aka AIDDL files) go to container:
C = Container()
......@@ -98,7 +99,7 @@ f_LAP = GrpcFunction(host, port, lap_uri)
# Finally, we can apply mean square error to data:
weights = f_MSE.apply(example_data)
# weights = f_MSE.apply(example_data)
# print("Weights:", weights)
......@@ -116,7 +117,9 @@ weights = f_MSE.apply(example_data)
# F.add_function(ExpandData(), Symbolic("my.expander"))
mtc = MovingTargetRegCplex(f_LAP, n=30)
# mtc = MovingTargetRegCplex(f_LAP, n=30)
# mtc = MovingTargetClsCplex(f_LAP, n=30)
mtc = MovingTargetRegSMT(f_LAP, n=5)
mt_data = C.get_entry(Symbolic("moving-targets-problem"), module=example_module_uri).get_value()
mt_data = mt_data.resolve(C)
......
......@@ -19,24 +19,24 @@ class MovingTarget(ABC):
def apply(self, args):
ml_problem = args[Symbolic("examples")]
d = self.convert_data(args[Symbolic("examples")]) # d = (x_s, y_s)
L = args[Symbolic("loss-function")]
C = args[Symbolic("constraints")]
alpha = args[Symbolic("alpha")]
beta = args[Symbolic("beta")]
L = args[Symbolic("loss-function")].string_value()
C = self.convert_constraint(args[Symbolic("examples")], args[Symbolic("constraints")])
alpha = args[Symbolic("alpha")].real_value()
beta = args[Symbolic("beta")].real_value()
# initial step.
M = self.initialize_ext(d)
self.add_constraints(M, C, d)
self.add_constraints(M, C, d[0], d[1])
# self.set_loss_function(M, L, d)
# d_data = self.get_pysmt_data(y_k)
y_k = self.ML.apply(ml_problem)
y_k = np.array(y_k.unpack())
for i in range(0, self.n):
self.y_k_history.append(np.array(y_k.unpack()))
sat_C = self.check_constraints_ext(M, C, (d[0], np.array(y_k.unpack())))
self.y_k_history.append(y_k)
sat_C = self.check_constraints_ext(M, C, (d[0], y_k))
if not sat_C:
self.m_alpha(M, L, y_k, alpha)
......@@ -46,11 +46,12 @@ class MovingTarget(ABC):
z_k = self.solve_ext(M)
ml_problem = self.assemble_ml_problem(ml_problem, z_k)
y_k = self.ML.apply(ml_problem)
y_k = np.array(y_k.unpack())
print(y_k)
return y_k
@abstractmethod
def add_constraints(self, M, C, y_s):
def add_constraints(self, M, C, x_s, y_s):
"""Convert collection of constraint expressions."""
@abstractmethod
......@@ -94,7 +95,37 @@ class MovingTarget(ABC):
x = np.array(x)
y = np.array(y)
return x, y