Commit 9c2f9f47 authored by Fabrizio Detassis's avatar Fabrizio Detassis
Browse files

Small refactoring, new constraints and classification problem

parent 5cfac261
......@@ -65,12 +65,12 @@
(#tuple moving-targets-problem
(
examples:$examples ;; Regular supervised ML problem
loss-function:MeanSquaredError ;; Select available loss function
loss-function:MeanAbsoluteError ;; Select available loss function
constraints:{
(>= y -30) ;; Each element in set needs to be translated to PySMT
(didi-real y [x] 0.5)
(didi-bin y [x] 0.5)
(balance y 0.1)
;; (didi-real y [x] 0.5)
;; (didi-bin y [x] 0.5)
;; (balance y 0.1)
}
alpha:1 ;; Parameter alpha
beta:1 ;; Parameter beta
......
(#mod self org.aiddl.example.moving-target.test-01)
(#nms EVAL org.aiddl.eval-ht)
(#req SL org.aiddl.common.learning.supervised)
(^RegressionProblem@SL examples
(
attributes:[(y ^#real) (x1 ^#real) (x2 ^#real)]
label:y
data:[
(2 0.46876 0)
(1 0.68725 0)
(2 0.70889 0)
(2 0.24617 0)
(2 0.64563 0)
(2 0.20468 0)
(1 0.37330 0)
(1 0.58604 0)
(1 0.42809 0)
(0 0.05272 0)
(2 0.25810 0)
(1 0.15931 0)
(0 0.58373 0)
(0 0.15215 0)
(1 0.31580 0)
(1 0.48765 0)
(2 0.23271 0)
(0 0.33075 0)
(0 0.02454 0)
(0 0.27108 0)
(1 0.82266 0)
(0 0.65037 0)
(0 0.11161 0)
(2 0.50958 0)
(0 0.80177 0)
(2 0.28616 0)
(1 0.74214 0)
(0 0.93101 0)
(0 0.57490 0)
(2 0.28891 0)
(0 0.02450 0)
(0 0.29775 0)
(0 0.08457 0)
(1 0.57290 0)
(1 0.98779 0)
(0 0.21251 0)
(1 0.30324 0)
(0 0.37703 0)
(0 0.15422 0)
(2 0.90874 0)
(0 0.52846 0)
(0 0.03465 0)
(2 0.66185 0)
(1 0.18609 0)
(0 0.22061 0)
(2 0.07671 0)
(2 0.89159 0)
(2 0.34661 0)
(2 0.31532 0)
(2 0.97554 0)
(0 0.63249 1)
(1 0.73163 1)
(2 0.41826 1)
(2 0.35865 1)
(2 0.67698 1)
(1 0.01482 1)
(0 0.71497 1)
(2 0.57192 1)
(1 0.06882 1)
(1 0.47431 1)
(2 0.12842 1)
(0 0.38252 1)
(2 0.35838 1)
(1 0.14697 1)
(2 0.10600 1)
(2 0.27471 1)
(0 0.59134 1)
(2 0.36586 1)
(2 0.89406 1)
(2 0.38183 1)
(2 0.42676 1)
(1 0.27430 1)
(1 0.41646 1)
(2 0.65456 1)
(1 0.71556 1)
(2 0.97935 1)
(2 0.48944 1)
(1 0.09128 1)
(2 0.93509 1)
(2 0.80899 1)
(2 0.29443 1)
(2 0.75088 1)
(2 0.74210 1)
(2 0.54209 1)
(1 0.73187 1)
(2 0.15325 1)
(2 0.55892 1)
(2 0.18299 1)
(1 0.97745 1)
(2 0.85193 1)
(2 0.91503 1)
(1 0.69043 1)
(1 0.20993 1)
(2 0.79305 1)
(2 0.68840 1)
(2 0.87589 1)
(2 0.90116 1)
(2 0.30976 1)
(2 0.79235 1)
(1 0.65276 1)
]
))
(#tuple moving-targets-problem
(
examples:$examples ;; Regular supervised ML problem
loss-function:HammingDistance ;; Select available loss function
constraints:{
;; (>= y -30) ;; Each element in set needs to be translated to PySMT
;; (didi-real y [x2] 0.5)
;; (didi-bin y [x2] 0.5)
(balance y 0.1)
}
alpha:1 ;; Parameter alpha
beta:1 ;; Parameter beta
))
\ No newline at end of file
(#mod self org.aiddl.example.moving-target.test-01)
(#nms EVAL org.aiddl.eval-ht)
(#req SL org.aiddl.common.learning.supervised)
(^RegressionProblem@SL examples
(
attributes:[(y ^#real) (x1 ^#real) (x2 ^#real)]
label:y
data:[
(0.14399 0.93351 0)
(0.10569 0.57763 0)
(0.15547 0.20965 0)
(0.01678 0.36641 0)
(0.05720 0.67262 0)
(0.11661 0.97933 0)
(0.12481 0.91705 0)
(0.20830 0.58882 0)
(0.00994 0.46877 0)
(0.07117 0.31546 0)
(0.00000 0.88777 0)
(0.22190 0.18996 0)
(0.17124 0.20468 0)
(0.05489 0.86582 0)
(0.14873 0.49931 0)
(0.10313 0.50076 0)
(0.12818 0.14048 0)
(0.16510 0.21265 0)
(0.04335 0.14884 0)
(0.05335 0.43776 0)
(0.09383 0.35457 0)
(0.09007 0.01122 0)
(0.10815 0.98088 0)
(0.09873 0.99758 0)
(0.16206 0.81442 0)
(0.08034 0.84978 0)
(0.04674 0.95845 0)
(0.16197 0.05119 0)
(0.17001 0.54125 0)
(0.20784 0.02575 0)
(0.13035 0.01298 0)
(0.19193 0.22368 0)
(0.06557 0.39587 0)
(0.22974 0.46312 0)
(0.17473 0.01764 0)
(0.09336 0.47501 0)
(0.01593 0.99950 0)
(0.06919 0.15301 0)
(0.16508 0.29693 0)
(0.21398 0.68030 0)
(0.10951 0.11171 0)
(0.06320 0.33794 0)
(0.08470 0.75342 0)
(0.11798 0.04564 0)
(0.04152 0.84485 0)
(0.04467 0.38008 0)
(0.07386 0.00816 0)
(0.13453 0.88137 0)
(0.08867 0.15829 0)
(0.10146 0.96530 0)
(0.60087 0.58609 1)
(0.28802 0.80639 1)
(0.45752 0.34234 1)
(0.40707 0.20283 1)
(0.36236 0.62417 1)
(0.55087 0.69895 1)
(0.73721 0.68265 1)
(0.38855 0.04170 1)
(0.51397 0.57022 1)
(0.29755 0.98608 1)
(0.38471 0.47996 1)
(0.26678 0.77691 1)
(0.64121 0.78459 1)
(0.39326 0.76393 1)
(0.90136 0.15291 1)
(0.35579 0.51981 1)
(0.66407 0.49847 1)
(0.38942 0.19926 1)
(0.40315 0.85785 1)
(0.45099 0.16807 1)
(0.30725 0.20619 1)
(0.44424 0.96056 1)
(0.66612 0.87987 1)
(0.27660 0.81143 1)
(0.46951 0.96977 1)
(0.32858 0.52235 1)
(0.74554 0.22402 1)
(0.48748 0.90914 1)
(0.55029 0.86873 1)
(0.64465 0.19226 1)
(0.75841 0.44601 1)
(0.30840 0.04392 1)
(0.40345 0.90458 1)
(0.06413 0.60732 1)
(0.55159 0.12752 1)
(0.53627 0.48400 1)
(0.37135 0.60513 1)
(0.42941 0.65884 1)
(0.75241 0.74990 1)
(0.63155 0.87993 1)
(0.21681 0.10326 1)
(0.19754 0.25903 1)
(0.63268 0.73537 1)
(0.28253 0.35073 1)
(0.43952 0.96812 1)
(0.95463 0.01418 1)
(0.49971 0.49996 1)
(0.18556 0.20309 1)
(1.00000 0.23667 1)
(0.38108 0.75320 1)
]
))
(#tuple moving-targets-problem
(
examples:$examples ;; Regular supervised ML problem
loss-function:MeanSquaredError ;; Select available loss function
constraints:{
(>= y -30) ;; Each element in set needs to be translated to PySMT
(didi-real y [x2] 0.5)
;; (didi-bin y [x] 0.5)
;; (balance y 0.1)
}
alpha:1 ;; Parameter alpha
beta:1 ;; Parameter beta
))
\ No newline at end of file
......@@ -26,7 +26,7 @@ def didi_r(x, y, pfeat):
return tot
n_points = 10
n_points = 50
pfeat = [1]
x1 = np.hstack([np.random.rand(n_points, 1),
np.full(shape=(n_points, 1), fill_value=0)])
......@@ -35,7 +35,7 @@ x2 = np.hstack([np.random.rand(n_points, 1),
x = np.vstack([x1, x2])
y1 = np.random.normal(loc=0.0, scale=1.0, size=(n_points, 1))
y2 = np.random.normal(loc=3.0, scale=3.0, size=(n_points, 1))
y2 = np.random.normal(loc=5.0, scale=3.0, size=(n_points, 1))
y = np.vstack([y1, y2])
y = (y - y.min()) / (y.max() - y.min())
......
......@@ -15,7 +15,8 @@ from aiddl_network.grpc_function import GrpcFunction
from aiddl_network.aiddl_grpc_server import AiddlServicer
from aiddl_network.aiddl_grpc_server import LOADER_URI
from moving_target_cplex import MovingTargetRegCplex
from moving_target_cplex import MovingTargetRegCplex, MovingTargetClsCplex
from moving_target_smt import MovingTargetRegSMT
# Loaded modules (aka AIDDL files) go to container:
C = Container()
......@@ -116,7 +117,9 @@ f_LAP = GrpcFunction(host, port, lap_uri)
# F.add_function(ExpandData(), Symbolic("my.expander"))
mtc = MovingTargetRegCplex(f_LAP, n=30)
# mtc = MovingTargetRegCplex(f_LAP, n=30)
# mtc = MovingTargetClsCplex(f_LAP, n=30)
mtc = MovingTargetRegSMT(f_LAP, n=5)
mt_data = C.get_entry(Symbolic("moving-targets-problem"), module=example_module_uri).get_value()
mt_data = mt_data.resolve(C)
......
......@@ -19,12 +19,12 @@ class MovingTarget(ABC):
def apply(self, args):
ml_problem = args[Symbolic("examples")]
d = self.convert_data(args[Symbolic("examples")]) # d = (x_s, y_s)
L = args[Symbolic("loss-function")]
C = args[Symbolic("constraints")]
alpha = args[Symbolic("alpha")]
beta = args[Symbolic("beta")]
# initial step.
L = args[Symbolic("loss-function")].string_value()
C = self.convert_constraint(args[Symbolic("examples")], args[Symbolic("constraints")])
alpha = args[Symbolic("alpha")].real_value()
beta = args[Symbolic("beta")].real_value()
# initial step.
M = self.initialize_ext(d)
self.add_constraints(M, C, d[0], d[1])
......@@ -32,10 +32,11 @@ class MovingTarget(ABC):
# d_data = self.get_pysmt_data(y_k)
y_k = self.ML.apply(ml_problem)
y_k = np.array(y_k.unpack())
for i in range(0, self.n):
self.y_k_history.append(np.array(y_k.unpack()))
sat_C = self.check_constraints_ext(M, C, (d[0], np.array(y_k.unpack())))
self.y_k_history.append(y_k)
sat_C = self.check_constraints_ext(M, C, (d[0], y_k))
if not sat_C:
self.m_alpha(M, L, y_k, alpha)
......@@ -45,6 +46,7 @@ class MovingTarget(ABC):
z_k = self.solve_ext(M)
ml_problem = self.assemble_ml_problem(ml_problem, z_k)
y_k = self.ML.apply(ml_problem)
y_k = np.array(y_k.unpack())
print(y_k)
return y_k
......@@ -93,7 +95,37 @@ class MovingTarget(ABC):
x = np.array(x)
y = np.array(y)
return x, y
@staticmethod
def convert_constraint(current, constraints):
label = current[Symbolic('label')]
attributes = current[Symbolic('attributes')]
features = [att[0].string_value() for att in attributes if att[0] != label]
print("attributes ", attributes)
print("features ", features)
C = list()
for c in constraints:
ctype = c[0].string_value()
if ctype in ('didi-bin', 'didi-real'):
cvar = c[1].string_value()
cfeat = c[2]
ix_feat = list()
for f in cfeat:
ix_f = features.index(f.string_value())
ix_feat.append(ix_f)
cval = c[3].real_value()
C.append((ctype, cvar, ix_feat, cval))
else:
cvar = c[1].string_value()
cval = c[2].real_value()
C.append((ctype, cvar, cval))
print("Converted constraints: ", C)
return C
def assemble_ml_problem(self, current, y_k):
"""Insert current label vector into machine learning problem."""
label = current[Symbolic('label')]
......
......@@ -31,11 +31,7 @@ class MovingTargetClsCplex(MovingTarget):
]
"""
for c in C:
# ctype = c[0].string_value()
# cvar = c[1].string_value()
# cval = c[2].real_value()
ctype, cvar, cval = c
pfeat = [1]
ctype = c[0]
# Translate the constraint to CPLEX language.
# todo: indexing of variables contraint <-> model
......@@ -47,14 +43,19 @@ class MovingTargetClsCplex(MovingTarget):
# Store the constraint in the class.
if ctype == 'balance':
cvar = c[1]
cval = c[2]
cstr = BalanceConstraint('ct', cval)
B = int(np.ceil(self.n_points / self.n_classes))
for c in range(self.n_classes):
xpr = M.sum([x[i][c] for i in range(self.n_points)])
M.add_constraint(xpr <= B + cval)
elif ctype == 'didi-cls':
cstr = FairnessClsConstraint('ct', [1], cval)
elif ctype == 'didi-bin':
cvar = c[1]
pfeat = c[2]
cval = c[3]
cstr = FairnessClsConstraint('ct', pfeat, cval)
constraint = .0
abs_val = M.continuous_var_list(keys=len(np.unique(x_s[:, pfeat])) * self.n_classes,
name='abs_val')
......@@ -91,12 +92,10 @@ class MovingTargetClsCplex(MovingTarget):
[
M.get_var_by_name("y_%s_%s" % (i, c)) for c in range(self.n_classes)
] for i in range(self.n_points)]
# y_k = np.array(y_k.unpack())
y_k = y_k.reshape(-1)
y_c = [self.inv_classes[y] for y in self.y]
yk_c = [self.inv_classes[y] for y in y_k]
# if L == Symbolic('HD') or L == Symbolic('HammingDistance'):
yk_c = [self.inv_classes[int(y)] for y in y_k]
if L == 'HD' or L == 'HammingDistance':
y_loss = (1 / self.n_points) * M.sum([(1 - x[i][y_c[i]]) for i in range(self.n_points)])
p_loss = (1 / self.n_points) * M.sum([(1 - x[i][yk_c[i]]) for i in range(self.n_points)])
......@@ -104,7 +103,6 @@ class MovingTargetClsCplex(MovingTarget):
else:
raise NotImplementedError("Loss function not recognized!")
# obj_func = y_loss + (1.0 / alpha.real_value()) * p_loss
obj_func = y_loss + (1.0 / alpha) * p_loss
M.minimize(obj_func)
......@@ -114,12 +112,10 @@ class MovingTargetClsCplex(MovingTarget):
[
M.get_var_by_name("y_%s_%s" % (i, c)) for c in range(self.n_classes)
] for i in range(self.n_points)]
# y_k = np.array(y_k.unpack())
y_k = y_k.reshape(-1)
y_c = [self.inv_classes[y] for y in self.y]
yk_c = [self.inv_classes[y] for y in y_k]
# if L == Symbolic('HD') or L == Symbolic('HammingDistance'):
yk_c = [self.inv_classes[int(y)] for y in y_k]
if L == 'HD' or L == 'HammingDistance':
y_loss = (1 / self.n_points) * M.sum([(1 - x[i][y_c[i]]) for i in range(self.n_points)])
p_loss = (1 / self.n_points) * M.sum([(1 - x[i][yk_c[i]]) for i in range(self.n_points)])
......@@ -128,7 +124,6 @@ class MovingTargetClsCplex(MovingTarget):
raise NotImplementedError("Loss function not recognized!")
obj_func = y_loss
# M.add(p_loss <= beta.real_value())
M.add(p_loss <= beta)
M.minimize(obj_func)
......@@ -226,6 +221,7 @@ class MovingTargetRegCplex(MovingTarget):
x = None
y = None
n_points = None
variables = list()
constraints = list()
......@@ -234,16 +230,12 @@ class MovingTargetRegCplex(MovingTarget):
Constraints are expressed with a list/set? of tuples, i.e.:
C = [
('>', y, -30),
('didi-real', y, [x], 0.2),
('didi-real', y, [x2], 0.2),
]
"""
for c in C:
print(c)
ctype = c[0].string_value()
cvar = c[1].string_value()
cval = c[2].real_value()
# ctype, cvar, cval = c
pfeat = [1]
ctype = c[0]
# Translate the constraint to CPLEX language.
# todo: indexing of variables contraint <-> model
......@@ -251,6 +243,8 @@ class MovingTargetRegCplex(MovingTarget):
# Store the constraint in the class.
if ctype in ('<=', '<', '>=', '>'):
cvar = c[1]
cval = c[2]
cstr = InequalityRegGlobalConstraint('ct', ctype, cval)
if ctype == '>=':
M.add_constraints([_x >= cval for _x in x])
......@@ -261,8 +255,11 @@ class MovingTargetRegCplex(MovingTarget):
elif ctype == '>':
M.add_constraints([_x > cval for _x in x])
elif ctype == 'didi-reg':
cstr = FairnessRegConstraint('ct', [1], cval)
elif ctype == 'didi-real':
cvar = c[1]
pfeat = c[2]
cval = c[3]
cstr = FairnessRegConstraint('ct', pfeat, cval)
constraint = .0
abs_val = M.continuous_var_list(keys=len(np.unique(x_s[:, pfeat])), name='abs_val')
......@@ -294,52 +291,38 @@ class MovingTargetRegCplex(MovingTarget):
def m_alpha(self, M, L, y_k, alpha):
n_points = len(y_k)
idx_var = [i for i in range(n_points)]
x = [M.get_var_by_name(s) for s in self.variables]
# y_k = np.array(y_k.unpack())
y_k = y_k.reshape(-1)
# if L == Symbolic('MSE') or L == Symbolic('MeanSquaredError'):
if L == 'MSE' or L == 'MeanSquaredError':
y_loss = (1.0 / n_points) * M.sum([(self.y[i] - x[i]) * (self.y[i] - x[i]) for i in idx_var])
p_loss = (1.0 / n_points) * M.sum([(y_k[i] - x[i]) * (y_k[i] - x[i]) for i in idx_var])
y_loss = (1.0 / self.n_points) * M.sum([(self.y[i] - x[i]) * (self.y[i] - x[i]) for i in range(self.n_points)])
p_loss = (1.0 / self.n_points) * M.sum([(y_k[i] - x[i]) * (y_k[i] - x[i]) for i in range(self.n_points)])
# elif L == Symbolic('MAE') or L == Symbolic('MeanAbsoluteError'):
elif L == 'MAE' or L == 'MeanAbsoluteError':
y_loss = (1.0 / n_points) * M.sum([M.abs(self.y[i] - x[i]) for i in idx_var])
p_loss = (1.0 / n_points) * M.sum([M.abs(y_k[i] - x[i]) for i in idx_var])
y_loss = (1.0 / self.n_points) * M.sum([M.abs(self.y[i] - x[i]) for i in range(self.n_points)])
p_loss = (1.0 / self.n_points) * M.sum([M.abs(y_k[i] - x[i]) for i in range(self.n_points)])
else:
raise NotImplementedError("Loss function not recognized!")
# obj_func = y_loss + (1.0 / alpha.real_value()) * p_loss
obj_func = y_loss + (1.0 / alpha) * p_loss
M.minimize(obj_func)
def m_beta(self, M, L, y_k, beta):
n_points = len(y_k)
idx_var = [i for i in range(n_points)]
x = [M.get_var_by_name(s) for s in self.variables]
# y_k = np.array(y_k.unpack())
y_k = y_k.reshape(-1)
# if L == Symbolic('MSE') or L == Symbolic('MeanSquaredError'):
if L == 'MSE' or L == 'MeanSquaredError':
y_loss = (1.0 / n_points) * M.sum([(self.y[i] - x[i]) * (self.y[i] - x[i]) for i in idx_var])
p_loss = (1.0 / n_points) * M.sum([(y_k[i] - x[i]) * (y_k[i] - x[i]) for i in idx_var])
y_loss = (1.0 / self.n_points) * M.sum([(self.y[i] - x[i]) * (self.y[i] - x[i]) for i in range(self.n_points)])
p_loss = (1.0 / self.n_points) * M.sum([(y_k[i] - x[i]) * (y_k[i] - x[i]) for i in range(self.n_points)])
# elif L == Symbolic('MAE') or L == Symbolic('MeanAbsoluteError'):
elif L == 'MAE' or L == 'MeanAbsoluteError':
y_loss = (1.0 / n_points) * M.sum([M.abs(self.y[i] - x[i]) for i in idx_var])
p_loss = (1.0 / n_points) * M.sum([M.abs(y_k[i] - x[i]) for i in idx_var])
y_loss = (1.0 / self.n_points) * M.sum([M.abs(self.y[i] - x[i]) for i in range(self.n_points)])
p_loss = (1.0 / self.n_points) * M.sum([M.abs(y_k[i] - x[i]) for i in range(self.n_points)])
else:
raise NotImplementedError("Loss function not recognized!")
obj_func = y_loss
# M.add(p_loss <= beta.real_value())
M.add(p_loss <= beta)
M.minimize(obj_func)
......@@ -379,9 +362,9 @@ class MovingTargetRegCplex(MovingTarget):
mod = CPModel(name)
# Variable declaration.
n_points = len(y)
idx_var = [i for i in range(n_points)]
mod.continuous_var_list(keys=idx_var, name='y')
self.n_points = len(y)
idx_var = [i for i in range(self.n_points)]
mod.continuous_var_list(keys=idx_var, lb=-1e3, ub=1e3, name='y')
# Store variable names.
self.variables = ['y_%d' %i for i in idx_var]
......@@ -403,7 +386,7 @@ class MovingTargetRegCplex(MovingTarget):
def test_fairness_reg():
n_points = 10
n_points = 30
pfeat = [1]
# Generation of synthetic data.
......@@ -540,7 +523,7 @@ def test_bal_cls():