Commit 715f2406 authored by Uwe Köckemann's avatar Uwe Köckemann
Browse files

Added normalization and one hot encoding and scikit learn wrapper

parent 98ab8c39
(#mod self org.aiddl.example.moving-target.enc-test)
(#nms EVAL org.aiddl.eval-ht)
(#req SL org.aiddl.common.learning.supervised)
(^RegressionProblem@SL enc-test-01
(
attributes:[(y ^#real) (x ^#real)]
label:y
data:[
[1.0 a]
[2.0 b]
[3.0 c]
]
))
......@@ -68,8 +68,10 @@
loss-function:MeanSquaredError ;; Select available loss function
constraints:{
(>= y -30) ;; Each element in set needs to be translated to PySMT
;;(fairness-1 y 1.0)
(didi-real y [x] 0.5)
(didi-bin y [x] 0.5)
(balance y 0.1)
}
alpha:1 ;; Parameter alpha
beta:1 ;; Parameter beta
))
\ No newline at end of file
))
\ No newline at end of file
This diff is collapsed.
......@@ -111,7 +111,7 @@ class MovingTarget(ABC):
if j == label_idx:
row.append(Real(y_k[i]))
else:
row.append(Real(data[i][j]))
row.append(data[i][j])
new_data.append(Tuple(row))
new_data = List(new_data)
problem = current.put(Symbolic('data'), new_data)
......
from aiddl_core.representation.numerical import Numerical
from aiddl_core.representation.symbolic import Symbolic
from aiddl_core.representation.integer import Integer
from aiddl_core.representation.real import Real
from aiddl_core.representation.infinity import Infinity
from aiddl_core.representation.list import List
from aiddl_core.representation.tuple import Tuple
from aiddl_core.function.function import Function
class OneHotEncoder(Function):
def createNormalizer(o, d):
return lambda x: [(o+x)/(d+o)]
def apply(self, mlProblem):
atts = mlProblem[Symbolic("attributes")]
label = mlProblem[Symbolic("label")]
data = mlProblem[Symbolic("data")]
encoding = {}
decoding = []
max_array = []
for i in range(len(atts)):
is_numerical = True
min = Infinity.pos()
max = Infinity.neg()
domain = set()
for j in range(len(data)):
x = data[j][i]
is_numerical &= isinstance(x, Numerical)
if is_numerical:
if min > x:
min = x
if max < x:
max = x
domain.add(x)
if not is_numerical:
encoding_i = {}
decoding_i = []
j = 0
for e in domain:
encoding_i[e] = [0]*j + [1] + [0]*(len(domain)-j-1)
j += 1
decoding_i.append(e)
encoding[i] = encoding_i.get
decoding.append(List(decoding_i))
max_array.append(None)
else:
max_array.append(max)
encoding[i] = OneHotEncoder.createNormalizer(Integer(-1) * min, max)
decoding.append(Tuple([Integer(-1) * min, max]))
enc_data = []
for row in data:
enc_row = []
for i in range(len(row)):
enc_row += encoding[i](row[i])
enc_data.append(List(enc_row))
mlProblem = mlProblem.put(Symbolic("data"), List(enc_data))
mlProblem = mlProblem.put(Symbolic("decoding"), List(decoding))
return mlProblem
import numpy as np
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.exceptions import NotFittedError
from sklearn.naive_bayes import GaussianNB
from aiddl_core.parser.parser import parse_term
from aiddl_core.representation.symbolic import Symbolic
from aiddl_core.representation.integer import Integer
from aiddl_core.representation.list import List
from aiddl_core.function.function import Function
class RandomForestLearner(Function):
def __init__(self):
self.n_estimators = 50
self.max_depth = 5
self.model = RandomForestClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth)
def configure(self, cfg, freg):
self.n_estimators = cfg.get_or_default(Symbolic("n_estimators"), Integer(self.n_estimators)).int_value()
self.n_estimators = cfg.get_or_default(Symbolic("max_depth"), Integer(self.max_depth)).int_value()
self.model = RandomForestClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth)
def apply(self, mlProblem):
x, y = splitAiddlMlProblem(mlProblem)
self.model.fit(x, y)
y_p = self.model.predict(x)
r = []
for e in y_p:
r.append(parse_term(str(e)))
return List(r)
# def fit(self, x, y):
# self.model.fit(x, y)
# def predict(self, x):
# return self.model.predict(x)
# def predict_proba(self, x):
# return self.model.predict_proba(x)
class GBTree(Function):
def __init__(self):
self.n_estimators = 50
self.min_samples_leaf = 5
self.model = GradientBoostingRegressor(n_estimators=self.n_estimators,
min_samples_leaf=self.min_samples_leaf)
def configure(self, cfg, freg):
self.n_estimators = cfg.get_or_default(Symbolic("n_estimators"), Integer(self.n_estimators)).int_value()
self.n_estimators = cfg.get_or_default(Symbolic("min_samples_leaf"), Integer(self.min_samples_leaf)).int_value()
self.model = GradientBoostingRegressor(n_estimators=self.n_estimators,
min_samples_leaf=self.min_samples_leaf)
def apply(self, mlProblem):
x, y = splitAiddlMlProblem(mlProblem)
self.model.fit(x, y)
y_p = self.model.predict(x)
print(y_p)
r = []
for e in y_p:
r.append(parse_term(str(e)))
return List(r)
def splitAiddlMlProblem(mlProblem):
atts = mlProblem[Symbolic("attributes")]
label = mlProblem[Symbolic("label")]
data = mlProblem[Symbolic("data")]
y = []
x = []
labelIdx = -1
for i in range(len(atts)):
if atts[i][0] == label:
labelIdx = i
break
c = 0
for row in data:
x_row = []
for i in range(len(row)):
if i == labelIdx:
y.append(row[i].unpack())
else:
x_row.append(row[i].unpack())
print(c, row[i], type(row[i]), type(row[i].unpack()))
c += 1
x.append(x_row)
return x, y
from abc import ABC, abstractmethod
from aiddl_core.representation.symbolic import Symbolic
from aiddl_core.representation.integer import Integer
from aiddl_core.representation.infinity import Infinity
from aiddl_core.representation.tuple import Tuple
from aiddl_core.container.container import Container
import aiddl_core.function.default as dfun
from aiddl_core.tools.logger import Logger
import aiddl_core.parser.parser as parser
from aiddl_network.grpc_function import GrpcFunction
from aiddl_network.aiddl_grpc_server import AiddlServicer
from aiddl_network.aiddl_grpc_server import LOADER_URI
from preprocessing import OneHotEncoder
# Loaded modules (aka AIDDL files) go to container:
C = Container()
# Loaded functions (#def in modules, or MovingTargetCplex instance)
# go to function registry:
F = dfun.get_default_function_registry(C)
# Load example (returns URI of module)
example_module_uri = parser.parse("../../aiddl/encoding-test.aiddl", C, F)
# Fetch "examples" entry from module:
example_entry = C.get_entry(Symbolic("enc-test-01"), module=example_module_uri)
# Take value of entry:
example_data = example_entry.get_value()
encoder = OneHotEncoder()
print(Logger.pretty_print(example_data, 0))
encoded_prob = encoder.apply(example_data)
print(Logger.pretty_print(encoded_prob, 0))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment