Commit 5b41f3fe authored by Uwe Köckemann's avatar Uwe Köckemann
Browse files

Split representation, added accuracy metric

parent 9b5100e0
This diff is collapsed.
This diff is collapsed.
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from abc import ABC, abstractmethod\n",
"\n",
"import random as r\n",
"\n",
"import matplotlib.pyplot as plt\n",
"plt.style.use('seaborn-whitegrid')\n",
"import numpy as np\n",
"\n",
"from aiddl_core.representation.symbolic import Symbolic\n",
"from aiddl_core.representation.integer import Integer\n",
"from aiddl_core.representation.real import Real\n",
"from aiddl_core.representation.infinity import Infinity\n",
"from aiddl_core.representation.tuple import Tuple\n",
"from aiddl_core.representation.list import List\n",
"from aiddl_core.container.container import Container\n",
"import aiddl_core.function.default as dfun\n",
"from aiddl_core.tools.logger import Logger\n",
"from aiddl_core.parser.parser import parse_term\n",
"\n",
"from aiddl_network.grpc_function import GrpcFunction\n",
"from aiddl_network.aiddl_grpc_server import AiddlServicer\n",
"from aiddl_network.aiddl_grpc_server import LOADER_URI\n",
"\n",
"import os\n",
"import sys\n",
"module_path = os.path.abspath(os.path.join('../python/moving_target'))\n",
"if module_path not in sys.path:\n",
" sys.path.append(module_path)\n",
"\n",
"from moving_target_cplex import MovingTargetRegCplex\n",
"from moving_target_cplex import MovingTargetClsCplex\n",
"from preprocessing import OneHotEncoder\n",
"from tools import CsvLoader\n",
"from utils import didi_c\n",
"from scikit_learn_wrapper import GBTree\n",
"from scikit_learn_wrapper import SciKitLearnFunction\n",
"from scikit_learn_wrapper import splitAiddlMlProblem"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Setup\n",
"\n",
"Create container and function registry and load some example data from a local file."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"C = Container()\n",
"F = dfun.get_default_function_registry(C)\n",
"\n",
"loader = CsvLoader()\n",
"\n",
"test_data = loader.apply(parse_term('(\"../resources/whitewine_test.csv\" \",\" \"quality\")'))\n",
"train_data = loader.apply(parse_term('(\"../resources/whitewine_train.csv\" \",\" \"quality\")'))\n",
"\n",
"print(\"Label:\", test_data[Symbolic(\"label\")])\n",
"print(\"Attributes:\", Logger.pretty_print(test_data[Symbolic(\"attributes\")], 1))\n",
"print(\"Data (first row):\", Logger.pretty_print(test_data[Symbolic(\"data\")][0], 1))\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Estimate Weights\n",
"\n",
"Finally, we can estimate the weights based on the resulting data set."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"learner = parse_term('''{\n",
" py_module:sklearn.ensemble\n",
" py_class:RandomForestClassifier\n",
" n_estimators:50\n",
" max_depth:5\n",
"}''')\n",
"\n",
"\n",
"f_ML = SciKitLearnFunction(learner)\n",
"F.add_function(Symbolic(\"mt.learner\"), f_ML)\n",
"\n",
"x, y = splitAiddlMlProblem(train_data)\n",
"x_test, y_test = splitAiddlMlProblem(test_data)\n",
"\n",
"y_0 = f_ML.apply(train_data)\n",
"\n",
"labels, counts = np.unique(y_0.unpack(), return_counts=True)\n",
"plt.title('White Wine Quality Predictions (Trainig Data)')\n",
"plt.bar(labels, counts, align='center')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Moving Targets on Example"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mt_cfg = parse_term('''(\n",
" n:3 ;; Number of iterations\n",
" alpha:1 ;; Parameter alpha\n",
" beta:1 ;; Parameter beta\n",
" problem-type:classification ;; classification/regression?\n",
" constraint-solver:cplex ;; Select constraint solver (cplex/pysmt)\n",
" learner:mt.learner ;; URI of learner (registered above)\n",
")''')\n",
"\n",
"mt_data = parse_term('''{\n",
" loss-function:HammingDistance ;; Select loss function\n",
" constraints:{\n",
" (balance \"quality\" 0.01)\n",
" ;;(didi-bin \"quality\" [\"alcohol\"] 0.01)\n",
" }\n",
"}''')\n",
"\n",
"\n",
"if mt_cfg.get(Symbolic(\"constraint-solver\")) == Symbolic(\"cplex\"):\n",
" if mt_cfg.get(Symbolic(\"problem-type\")) == Symbolic(\"classification\"):\n",
" f_MT = MovingTargetClsCplex(mt_cfg, F, test_data=x_test)\n",
" else:\n",
" f_MT = MovingTargetRegCplex(mt_cfg, F, test_data=x_test)\n",
"\n",
"mt_data = mt_data.put_all(train_data)\n",
"\n",
"y_k = f_MT.apply(mt_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"i = 0\n",
"for y_k in f_MT.y_k_history:\n",
" plt.figure()\n",
" labels, counts = np.unique(y_k, return_counts=True)\n",
" plt.title('White Wine Quality Predictions (iteration %d)' % i)\n",
" plt.bar(labels, counts, align='center')\n",
" i += 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"i = 0\n",
"for y_k in f_MT.y_k_test_history:\n",
" plt.figure()\n",
" labels, counts = np.unique(y_k, return_counts=True)\n",
" plt.title('White Wine Quality Predictions (iteration %d)' % i)\n",
" plt.bar(labels, counts, align='center')\n",
" i += 1\n",
"\n",
"i = 0\n",
"for y_k in f_MT.y_k_test_history:\n",
" didi_k = didi_c(x_test, y_k, [11])\n",
" print(\"didi_%d = %f\" % (i, didi_k))\n",
" i += 1\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "myenv",
"language": "python",
"name": "myenv"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": false,
"sideBar": false,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": false,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 4
}
......@@ -11,18 +11,20 @@ from aiddl_core.representation.tuple import Tuple
class MovingTarget(ABC):
def __init__(self, ML, n=10):
self.n = n
self.ML = ML
def __init__(self, cfg, freg, test_data=None):
self.n = cfg[Symbolic("n")].unpack()
self.ML = freg.get_function(cfg[Symbolic("learner")])
self.alpha = cfg[Symbolic("alpha")].real_value()
self.beta = cfg[Symbolic("beta")].real_value()
self.test_data = test_data
self.y_k_history = []
self.y_k_test_history = []
def apply(self, args):
ml_problem = args[Symbolic("examples")]
d = self.convert_data(args[Symbolic("examples")]) # d = (x_s, y_s)
ml_problem = args
d = self.convert_data(args) # d = (x_s, y_s)
L = args[Symbolic("loss-function")].string_value()
C = self.convert_constraint(args[Symbolic("examples")], args[Symbolic("constraints")])
alpha = args[Symbolic("alpha")].real_value()
beta = args[Symbolic("beta")].real_value()
C = MovingTarget.convert_constraint(args)
# initial step.
M = self.initialize_ext(d)
......@@ -36,18 +38,21 @@ class MovingTarget(ABC):
for i in range(0, self.n):
self.y_k_history.append(y_k)
if self.test_data is not None:
self.y_k_test_history.append(self.ML.predict(self.test_data))
sat_C = self.check_constraints_ext(M, C, (d[0], y_k))
if not sat_C:
self.m_alpha(M, L, y_k, alpha)
self.m_alpha(M, L, y_k, self.alpha)
else:
self.m_beta(M, L, y_k, beta)
print("-------------------------")
self.m_beta(M, L, y_k, self.beta)
z_k = self.solve_ext(M)
ml_problem = self.assemble_ml_problem(ml_problem, z_k)
y_k = self.ML.apply(ml_problem)
y_k = np.array(y_k.unpack())
print(y_k)
return y_k
@abstractmethod
......@@ -97,9 +102,11 @@ class MovingTarget(ABC):
return x, y
@staticmethod
def convert_constraint(current, constraints):
def convert_constraint(current):
label = current[Symbolic('label')]
attributes = current[Symbolic('attributes')]
constraints = current[Symbolic('constraints')]
features = [att[0].string_value() for att in attributes if att[0] != label]
print("attributes ", attributes)
print("features ", features)
......
......@@ -274,7 +274,7 @@ class MovingTargetRegCplex(MovingTarget):
# print("yp", yp)
if Np > 0:
tmp = (1.0 / self.n_points) * M.sum(x) - \
(1.0 / Np) * M.sum([yp[j][0] * x[j] for j in range(self.n_points)])
(1.0 / Np) * M.sum([mask[j][0] * x[j] for j in range(self.n_points)])
# Linearization of the absolute value.
M.add_constraint(abs_val[i] >= tmp)
M.add_constraint(abs_val[i] >= -tmp)
......
......@@ -33,7 +33,7 @@ class SciKitLearnFunction(Function):
def apply(self, mlProblem):
x, y = splitAiddlMlProblem(mlProblem)
print(y)
# print(y)
self.model.fit(x, y)
y_p = self.model.predict(x)
r = []
......@@ -41,6 +41,15 @@ class SciKitLearnFunction(Function):
r.append(parse_term(str(e)))
return List(r)
def fit(self, x, y):
self.model.fit(x, y)
def predict(self, x):
return self.model.predict(x)
def predict_proba(self, x):
return self.model.predict_proba(x)
class RandomForestLearner(Function):
def __init__(self):
self.n_estimators = 50
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment