Commit 60e74a47 authored by Uwe Köckemann's avatar Uwe Köckemann
Browse files

Merge branch 'master' of gitsvn-nt.oru.se:uwe.kockemann/moving-targets

parents 5b41f3fe 54c4fc34
FROM python:3.7-slim-buster
FROM python:3.7
WORKDIR /moving-target
......@@ -6,6 +6,7 @@ COPY requirements.txt requirements.txt
EXPOSE 8888
RUN pip3 install -r requirements.txt
RUN pysmt-install --msat --confirm-agreement
COPY . /moving-target
......
......@@ -72,7 +72,7 @@
;; (didi-bin y [x] 0.5)
;; (balance y 0.1)
}
type:classification
type:regression
alpha:1 ;; Parameter alpha
beta:1 ;; Parameter beta
))
\ No newline at end of file
......@@ -4,7 +4,7 @@
(#req SL org.aiddl.common.learning.supervised)
(^RegressionProblem@SL examples
(^ClassificationProblem@SL examples
(
attributes:[(y ^#real) (x1 ^#real) (x2 ^#real)]
label:y
......@@ -119,9 +119,10 @@
constraints:{
;; (>= y -30) ;; Each element in set needs to be translated to PySMT
;; (didi-real y [x2] 0.5)
;; (didi-bin y [x2] 0.5)
(balance y 0.1)
(didi-bin y [x2] 0.5)
;; (balance y 0.1)
}
type:classification
alpha:1 ;; Parameter alpha
beta:1 ;; Parameter beta
))
\ No newline at end of file
......@@ -115,13 +115,14 @@
(#tuple moving-targets-problem
(
examples:$examples ;; Regular supervised ML problem
loss-function:MeanSquaredError ;; Select available loss function
loss-function:MeanAbsoluteError ;; Select available loss function
constraints:{
(>= y -30) ;; Each element in set needs to be translated to PySMT
;; (>= y -30) ;; Each element in set needs to be translated to PySMT
(didi-real y [x2] 0.5)
;; (didi-bin y [x] 0.5)
;; (balance y 0.1)
}
type:regression
alpha:1 ;; Parameter alpha
beta:1 ;; Parameter beta
))
\ No newline at end of file
......@@ -15,8 +15,9 @@ from aiddl_network.grpc_function import GrpcFunction
from aiddl_network.aiddl_grpc_server import AiddlServicer
from aiddl_network.aiddl_grpc_server import LOADER_URI
from moving_target_cplex import MovingTargetRegCplex, MovingTargetClsCplex
from moving_target_smt import MovingTargetRegSMT
# from moving_target_cplex import MovingTargetRegCplex, MovingTargetClsCplex
# from moving_target_smt import MovingTargetRegSMT
from utils import get_problem
# Loaded modules (aka AIDDL files) go to container:
C = Container()
......@@ -26,7 +27,8 @@ C = Container()
F = dfun.get_default_function_registry(C)
# Load example (returns URI of module)
example_module_uri = parser.parse("../../aiddl/example-01.aiddl", C, F)
# example_module_uri = parser.parse("../../aiddl/example-01.aiddl", C, F)
example_module_uri = parser.parse("../../aiddl/example-fair-reg.aiddl", C, F)
# Fetch "examples" entry from module:
example_entry = C.get_entry(Symbolic("examples"), module=example_module_uri)
# Take value of entry:
......@@ -116,14 +118,17 @@ f_LAP = GrpcFunction(host, port, lap_uri)
# F.add_function(ExpandData(), Symbolic("my.expander"))
# mtc = MovingTargetRegCplex(f_LAP, n=30)
# mtc = MovingTargetClsCplex(f_LAP, n=30)
mtc = MovingTargetRegSMT(f_LAP, n=5)
mt_data = C.get_entry(Symbolic("moving-targets-problem"), module=example_module_uri).get_value()
mt_data = mt_data.resolve(C)
# mtc = MovingTargetRegCplex(f_LAP, n=30)
# mtc = MovingTargetClsCplex(f_LAP, n=10)
# mtc = MovingTargetRegSMT(f_LAP, n=5)
# Take value of entry:
problem_type = mt_data[Symbolic("type")].string_value()
movt_class = get_problem(problem_type, solver='smt')
mtc = movt_class(f_LAP, n=5)
# print(Logger.pretty_print(mt_data, 0))
mtc.apply(mt_data)
......@@ -26,7 +26,7 @@ class MovingTarget(ABC):
L = args[Symbolic("loss-function")].string_value()
C = MovingTarget.convert_constraint(args)
# initial step.
# Initial step.
M = self.initialize_ext(d)
self.add_constraints(M, C, d[0], d[1])
......
......@@ -268,7 +268,6 @@ class MovingTargetRegCplex(MovingTarget):
vals = np.unique(x_s[:, ix_feat])
for i, v in enumerate(vals):
mask = 1 * (x_s[:, ix_feat] == v).reshape(-1, 1)
yp = mask * self.y
Np = np.sum(mask)
# print("Np", Np)
# print("yp", yp)
......@@ -292,6 +291,7 @@ class MovingTargetRegCplex(MovingTarget):
def m_alpha(self, M, L, y_k, alpha):
x = [M.get_var_by_name(s) for s in self.variables]
# print("y_k", y_k)
if L == 'MSE' or L == 'MeanSquaredError':
y_loss = (1.0 / self.n_points) * M.sum([(self.y[i] - x[i]) * (self.y[i] - x[i]) for i in range(self.n_points)])
......@@ -383,228 +383,3 @@ class MovingTargetRegCplex(MovingTarget):
# self.logger.error("Model infeasible!")
mod.export_as_lp(path='./', basename='infeasible')
raise err
def test_fairness_reg():
n_points = 30
pfeat = [1]
# Generation of synthetic data.
x1 = np.hstack([np.random.rand(n_points, 1),
np.full(shape=(n_points, 1), fill_value=0)])
x2 = np.hstack([np.random.rand(n_points, 1),
np.full(shape=(n_points, 1), fill_value=1)])
x = np.vstack([x1, x2])
y1 = np.random.normal(loc=0.0, scale=1.0, size=(n_points, 1))
y2 = np.random.normal(loc=3.0, scale=3.0, size=(n_points, 1))
y = np.vstack([y1, y2])
y = (y - y.min()) / (y.max() - y.min())
d = x, y
# Loss function.
L = 'MAE'
didi_tr = utils.didi_r(x, y, pfeat)
# Costraints declaration.
C = [
# ("<=", 'y', 1.2),
# (">=", 'y', 0.3),
("didi-reg", ('y', pfeat), 2 * didi_tr),
]
# Params.
alpha = 1
beta = 1
print("Original data: ")
x, y = d
for i in range(len(x)):
print("%.2f \t %.2f" % (x[i, 0], y[i]))
m = MovingTargetRegCplex(None, n=1)
# CPLEX model initialization.
cplex_mod = m.initialize_ext(d)
# Add constraints.
m.add_constraints(cplex_mod, C, x, y)
# Check constraint satisfaction.
sat = m.check_constraints_ext(cplex_mod, C, d)
if not sat:
# Alpha step.
m.m_alpha(cplex_mod, L, y, alpha)
else:
# Beta step.
m.m_beta(cplex_mod, L, y, beta)
# Model solving.
y_new = m.solve_ext(cplex_mod)
print()
print("Final data: ")
print("(x1, x2) \t y \t y_new")
for i in range(len(x)):
print("(%.2f, %d) \t %.2f \t %.2f" % (x[i, 0], x[i, 1], y[i], y_new[i]))
print("Original data: didi = %.2f" % utils.didi_r(x, y, pfeat))
print("Tot: %.2f \t x2 = 0: %.2f \t x2 = 1: %.2f" % (np.mean(y),
np.mean(y[x[:, 1] == 0]),
np.mean(y[x[:, 1] == 1])))
print("New data: didi = %.2f" % utils.didi_r(x, y_new, pfeat))
print("Tot: %.2f \t x2 = 0: %.2f \t x2 = 1: %.2f" % (np.mean(y_new),
np.mean(y_new[x[:, 1] == 0]),
np.mean(y_new[x[:, 1] == 1])))
def test_bal_cls():
n_points = 15
# Generation of synthetic data.
x = np.random.rand(n_points, 1)
y = np.clip(np.random.poisson(lam=3, size=(n_points, 1)), a_min=0, a_max=2)
d = x, y
# Loss function.
L = 'HD'
# Constraints declaration.
C = [
# ("<=", 'y', 1.2),
# (">=", 'y', 0.3),
("balance", 'y', 0),
]
# Params.
alpha = 1
beta = 1
m = MovingTargetClsCplex(None, n=1)
# CPLEX model initialization.
cplex_mod = m.initialize_ext(d)
# Add constraints.
m.add_constraints(cplex_mod, C, x, y)
# Check constraint satisfaction.
sat = m.check_constraints_ext(cplex_mod, C, d)
if not sat:
# Alpha step.
m.m_alpha(cplex_mod, L, y, alpha)
else:
# Beta step.
m.m_beta(cplex_mod, L, y, beta)
# Model solving.
y_new = m.solve_ext(cplex_mod)
print()
print("Final data: ")
print("x \t y \t y_new")
for i in range(len(x)):
print("%.2f \t %.2f \t %.2f" % (x[i], y[i], y_new[i]))
print()
lab, cnts = np.unique(y, return_counts=True)
std = np.std(cnts / np.sum(cnts))
print("Initial balance: %.2f" % std)
lab, cnts = np.unique(y_new, return_counts=True)
std = np.std(cnts / np.sum(cnts))
print("Final balance: %.2f" % std)
def test_fairness_cls():
n_points = 50
pfeat = [1]
# Generation of synthetic data.
x1 = np.hstack([np.random.rand(n_points, 1),
np.full(shape=(n_points, 1), fill_value=0)])
x2 = np.hstack([np.random.rand(n_points, 1),
np.full(shape=(n_points, 1), fill_value=1)])
x = np.vstack([x1, x2])
y1 = np.random.poisson(lam=1, size=(n_points, 1))
y2 = np.random.poisson(lam=3, size=(n_points, 1))
y1 = np.clip(y1, a_min=0, a_max=2)
y2 = np.clip(y2, a_min=0, a_max=2)
y = np.vstack([y1, y2])
d = x, y
# Loss function.
L = 'HD'
didi_tr = utils.didi_c(x, y, pfeat)
# Costraints declaration.
C = [
# ("<=", 'y', 1.2),
# (">=", 'y', 0.3),
("didi-bin", 'y', pfeat, 0.5 * didi_tr),
]
# Params.
alpha = 1
beta = 1
print("Original data: ")
x, y = d
for i in range(len(x)):
print("%.2f \t %.2f" % (x[i, 0], y[i]))
m = MovingTargetClsCplex(None, n=1)
# CPLEX model initialization.
cplex_mod = m.initialize_ext(d)
# Add constraints.
m.add_constraints(cplex_mod, C, x, y)
# Check constraint satisfaction.
sat = m.check_constraints_ext(cplex_mod, C, d)
if not sat:
# Alpha step.
m.m_alpha(cplex_mod, L, y, alpha)
else:
# Beta step.
m.m_beta(cplex_mod, L, y, beta)
# Model solving.
y_new = m.solve_ext(cplex_mod)
print()
print("Final data: ")
print("(x1, x2) \t y \t y_new")
for i in range(len(x)):
print("(%.2f, %d) \t %.2f \t %.2f" % (x[i, 0], x[i, 1], y[i], y_new[i]))
print("Original data: didi = %.2f" % utils.didi_c(x, y, pfeat))
print("Tot: %.2f \t x2 = 0: %.2f \t x2 = 1: %.2f" % (np.mean(y),
np.mean(y[x[:, 1] == 0]),
np.mean(y[x[:, 1] == 1])))
print("New data: didi = %.2f" % utils.didi_c(x, y_new, pfeat))
print("Tot: %.2f \t x2 = 0: %.2f \t x2 = 1: %.2f" % (np.mean(y_new),
np.mean(y_new[x[:, 1] == 0]),
np.mean(y_new[x[:, 1] == 1])))
print("Data: ")
print("(y, x1, x2)")
for i in range(len(x)):
print("(%d, %.5f, %d)" % (y[i], x[i, 0], x[i, 1]))
if __name__ == '__main__':
# test_fairness_reg()
# test_bal_cls()
test_fairness_cls()
......@@ -3,11 +3,48 @@ import numpy as np
from moving_target_abc import MovingTarget
from pysmt.shortcuts import Symbol, LE, GE, LT, GT, Int, And, Equals, Plus, Minus, Div, Times, Max, is_sat, get_model, \
Real, Solver, Ite
from pysmt.typing import REAL
from constraint import InequalityRegGlobalConstraint
from pysmt.typing import REAL, INT
from constraint import InequalityRegGlobalConstraint, BalanceConstraint
from constraint import FairnessRegConstraint, FairnessClsConstraint
import utils
import signal
from contextlib import contextmanager
import _thread
import threading
class TimeoutException(Exception):
pass
@contextmanager
def time_limit2(seconds, msg=''):
timer = threading.Timer(seconds, lambda: _thread.interrupt_main())
timer.start()
try:
yield
except KeyboardInterrupt:
raise TimeoutException("Timed out for operation {}".format(msg))
finally:
# if the action ends in specified time, timer is canceled
timer.cancel()
@contextmanager
def time_limit(seconds):
def signal_handler(signum, frame):
raise TimeoutException("Timed out!")
signal.signal(signal.SIGALRM, signal_handler)
signal.alarm(seconds)
try:
yield
finally:
signal.alarm(0)
class SMTModel:
_TIME_LIMIT = 5
def __init__(self, name):
self.name = name
......@@ -25,7 +62,7 @@ class SMTModel:
for c in constraints:
self.add_constraint(c)
def optimize(self, alpha=0.2, max_iter=20):
def optimize(self, alpha=0.05, max_iter=20):
"""
Heuristic search for the optimal value of the SMT.
We proceed as follows:
......@@ -37,38 +74,52 @@ class SMTModel:
it = 0
domain = self.domain
loss = self.loss
unsat_counts = 0
print("Optimizing SMT Model")
sat = False
while it < max_iter:
while it < max_iter and unsat_counts < 3:
model = get_model(domain)
print("Solving problem")
current_loss = model.get_py_value(loss)
print("It: %d; current loss: %.2f" % (it, current_loss))
if it == 0:
ub = current_loss
lb = (1-alpha) * current_loss
ub = current_loss
lb = (1-alpha) * ub
print("Bounds: [%.2f - %.2f]" % (lb, ub))
if is_sat(And(domain, LT(loss, Real(lb)))):
if np.abs(ub - lb) > 0.01:
it += 1
else:
break
# try:
# with time_limit2(self._TIME_LIMIT):
# sat = is_sat(And(self.domain, LT(loss, Real(lb))))
# except TimeoutException as e:
# print("Timed out!")
# sat = False
# except Exception as e:
# raise e
sat = is_sat(And(self.domain, LT(loss, Real(lb))))
if sat:
print("SAT!")
domain = And(domain, LT(loss, Real(lb)))
ub = lb
lb = (1 - alpha) * ub
domain = And(self.domain, LT(loss, Real(lb)))
else:
print("UNSAT!")
ub = current_loss
alpha = alpha / 2
lb = (1-alpha) * ub
unsat_counts += 1
if np.abs(ub-lb) > 0.01:
it += 1
else:
break
model = get_model(domain)
self.domain = domain
model = get_model(self.domain)
y_opt = [float(model.get_py_value(y)) for y in self.variables]
# Distinguish between regression and classification models.
try:
y_opt = [float(model.get_py_value(y)) for y in self.variables]
except TypeError:
y_opt = [
[float(model.get_py_value(_y)) for _y in y] for y in self.variables
]
return y_opt
......@@ -77,15 +128,11 @@ class MovingTargetRegSMT(MovingTarget):
x = None
y = None
n_points = None
variables = list()
constraints = list()
""" REMOVED FOR NOW
def set_loss_function(self, M, lf):
pass
"""
def add_constraints(self, M: SMTModel, C, y_s):
def add_constraints(self, M: SMTModel, C, x_s, y_s):
"""
Constraints are expressed with a list/set? of tuples, i.e.:
C = [
......@@ -94,26 +141,65 @@ class MovingTargetRegSMT(MovingTarget):
]
"""
for c in C:
ctype, cvar, cval = c
# Store the constraint in the class.
cstr = InequalityRegGlobalConstraint('ct', ctype, cval)
self.constraints.append(cstr)
print(c)
ctype = c[0]
# Translate the constraint to SMT language.
# todo: indexing of variables contraint <-> model
x = M.variables
if ctype == '>=':
M.add_constraints([GE(_x, Real(cval)) for _x in x])
elif ctype == '<=':
M.add_constraints([LE(_x, Real(cval)) for _x in x])
elif ctype == '<':
M.add_constraints([LT(_x, Real(cval)) for _x in x])
elif ctype == '>':
M.add_constraints([GT(_x, Real(cval)) for _x in x])
if ctype in ('<=', '<', '>=', '>'):
# Store the constraint in the class.
cvar = c[1]
cval = c[2]
cstr = InequalityRegGlobalConstraint('ct', ctype, cval)
if ctype == '>=':
M.add_constraints([GE(_x, Real(cval)) for _x in x])
elif ctype == '<=':
M.add_constraints([LE(_x, Real(cval)) for _x in x])
elif ctype == '<':
M.add_constraints([LT(_x, Real(cval)) for _x in x])
elif ctype == '>':
M.add_constraints([GT(_x, Real(cval)) for _x in x])
elif ctype == 'didi-real':
cvar = c[1]
pfeat = c[2]
cval = c[3]
cstr = FairnessRegConstraint('ct', pfeat, cval)
constraint = .0
abs_val = [Symbol("y_%d" % i, REAL) for i in range(len(np.unique(x_s[:, pfeat])))]
# Add fairness constraint.
for ix_feat in pfeat:
vals = np.unique(x_s[:, ix_feat])
for i, v in enumerate(vals):
mask = 1 * (x_s[:, ix_feat] == v).reshape(-1, 1)
yp = mask * self.y
Np = np.sum(mask)
# print("mask", mask)
# print("Np", Np)
# print("yp", yp)
if Np > 0:
tmp1 = (1.0 / self.n_points) * Plus(x)
# tmp2 = Times(Div(Real(1.0), Real(float(Np))), Plus([Times(Int(int(mask[j][0])), x[j])
# for j in range(self.n_points)]))
tmp2 = Div(Real(1), Real(float(Np))) * Plus([mask[j][0] * x[j]
for j in range(self.n_points)])
# tmp2 = (1.0 / Np) * Plus([Ite(Real(float(mask[j][0])) > 0, x[j], Int(0))
# for j in range(self.n_points)])
# Linearization of the absolute value.
M.add_constraint(GE(abs_val[i], tmp1 - tmp2))
M.add_constraint(GE(abs_val[i], tmp2 - tmp1))
constraint += Plus(abs_val)
M.add_constraint(LE(constraint, Real(float(cval))))
else:
raise NotImplementedError("Constraint type not recognized " + str(ctype))
self.constraints.append(cstr)
print("Constraint added: " + str(