Skip to content
Snippets Groups Projects
Commit 266b200b authored by Baptiste Bauvin's avatar Baptiste Bauvin
Browse files

Added FatSCMLateFusion and reworked execmonoview slightly

parent edf3f9db
No related branches found
No related tags found
No related merge requests found
......@@ -23,88 +23,12 @@ __status__ = "Prototype" # Production, Development, Prototype
# __date__ = 2016 - 03 - 25
def initConstants(args, X, classificationIndices, labelsNames, name, directory):
try:
kwargs = args["args"]
except KeyError:
kwargs = args
t_start = time.time()
if type(X.attrs["name"]) == bytes:
feat = X.attrs["name"].decode("utf-8")
else:
feat = X.attrs["name"]
CL_type = kwargs["CL_type"]
X = getValue(X)
learningRate = float(len(classificationIndices[0])) / (len(classificationIndices[0]) + len(classificationIndices[1]))
labelsString = "-".join(labelsNames)
CL_type_string = CL_type
outputFileName = directory + CL_type_string + "/" + feat + "/" + "Results-" + CL_type_string + "-" + labelsString + \
'-learnRate' + str(learningRate) + '-' + name + "-" + feat + "-"
if not os.path.exists(os.path.dirname(outputFileName)):
try:
os.makedirs(os.path.dirname(outputFileName))
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
return kwargs, t_start, feat, CL_type, X, learningRate, labelsString, outputFileName
def initTrainTest(X, Y, classificationIndices):
trainIndices, testIndices, testIndicesMulticlass = classificationIndices
X_train = extractSubset(X, trainIndices)
X_test = extractSubset(X, testIndices)
if testIndicesMulticlass != []:
X_test_multiclass = extractSubset(X, testIndicesMulticlass)
else:
X_test_multiclass = []
y_train = Y[trainIndices]
y_test = Y[testIndices]
return X_train, y_train, X_test, y_test, X_test_multiclass
def getHPs(classifierModule, hyperParamSearch, nIter, CL_type, X_train, y_train, randomState,
outputFileName, KFolds, nbCores, metrics, kwargs):
if hyperParamSearch != "None":
logging.debug("Start:\t " + hyperParamSearch + " best settings with " + str(nIter) + " iterations for " + CL_type)
classifierHPSearch = getattr(classifierModule, hyperParamSearch)
cl_desc = classifierHPSearch(X_train, y_train, randomState, outputFileName, KFolds=KFolds, nbCores=nbCores,
metric=metrics[0], nIter=nIter)
clKWARGS = dict((str(index), desc) for index, desc in enumerate(cl_desc))
logging.debug("Done:\t " + hyperParamSearch + "RandomSearch best settings")
else:
clKWARGS = kwargs[CL_type + "KWARGS"]
return clKWARGS
def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, y_train, imagesAnalysis):
logging.info(stringAnalysis)
outputTextFile = open(outputFileName + '.txt', 'w')
outputTextFile.write(stringAnalysis)
outputTextFile.close()
np.savetxt(outputFileName + "full_pred.csv", full_labels_pred.astype(np.int16), delimiter=",")
np.savetxt(outputFileName + "train_pred.csv", y_train_pred.astype(np.int16), delimiter=",")
np.savetxt(outputFileName + "train_labels.csv", y_train.astype(np.int16), delimiter=",")
if imagesAnalysis is not None:
for imageName in imagesAnalysis:
if os.path.isfile(outputFileName + imageName + ".png"):
for i in range(1, 20):
testFileName = outputFileName + imageName + "-" + str(i) + ".png"
if not os.path.isfile(testFileName):
imagesAnalysis[imageName].savefig(testFileName)
break
imagesAnalysis[imageName].savefig(outputFileName + imageName + '.png')
def ExecMonoview_multicore(directory, name, labelsNames, classificationIndices, KFolds, datasetFileIndex, databaseType,
path, randomState, labels, hyperParamSearch="randomizedSearch",
metrics=[["accuracy_score", None]], nIter=30, **args):
DATASET = h5py.File(path + name + str(datasetFileIndex) + ".hdf5", "r")
# kwargs = args["args"]
# views = [DATASET.get("View" + str(viewIndex)).attrs["name"] for viewIndex in
# range(DATASET.get("Metadata").attrs["nbView"])]
neededViewIndex = args["viewIndex"]
X = DATASET.get("View" + str(neededViewIndex))
Y = labels
......@@ -186,6 +110,82 @@ def ExecMonoview(directory, X, Y, name, labelsNames, classificationIndices, KFol
return viewIndex, [CL_type, cl_desc + [feat], metricsScores, full_labels_pred, clKWARGS, y_test_multiclass_pred]
def initConstants(args, X, classificationIndices, labelsNames, name, directory):
try:
kwargs = args["args"]
except KeyError:
kwargs = args
t_start = time.time()
if type(X.attrs["name"]) == bytes:
feat = X.attrs["name"].decode("utf-8")
else:
feat = X.attrs["name"]
CL_type = kwargs["CL_type"]
X = getValue(X)
learningRate = float(len(classificationIndices[0])) / (len(classificationIndices[0]) + len(classificationIndices[1]))
labelsString = "-".join(labelsNames)
CL_type_string = CL_type
outputFileName = directory + CL_type_string + "/" + feat + "/" + "Results-" + CL_type_string + "-" + labelsString + \
'-learnRate' + str(learningRate) + '-' + name + "-" + feat + "-"
if not os.path.exists(os.path.dirname(outputFileName)):
try:
os.makedirs(os.path.dirname(outputFileName))
except OSError as exc:
if exc.errno != errno.EEXIST:
raise
return kwargs, t_start, feat, CL_type, X, learningRate, labelsString, outputFileName
def initTrainTest(X, Y, classificationIndices):
trainIndices, testIndices, testIndicesMulticlass = classificationIndices
X_train = extractSubset(X, trainIndices)
X_test = extractSubset(X, testIndices)
if testIndicesMulticlass != []:
X_test_multiclass = extractSubset(X, testIndicesMulticlass)
else:
X_test_multiclass = []
y_train = Y[trainIndices]
y_test = Y[testIndices]
return X_train, y_train, X_test, y_test, X_test_multiclass
def getHPs(classifierModule, hyperParamSearch, nIter, CL_type, X_train, y_train, randomState,
outputFileName, KFolds, nbCores, metrics, kwargs):
if hyperParamSearch != "None":
logging.debug("Start:\t " + hyperParamSearch + " best settings with " + str(nIter) + " iterations for " + CL_type)
classifierHPSearch = getattr(classifierModule, hyperParamSearch)
cl_desc = classifierHPSearch(X_train, y_train, randomState, outputFileName, KFolds=KFolds, nbCores=nbCores,
metric=metrics[0], nIter=nIter)
clKWARGS = dict((str(index), desc) for index, desc in enumerate(cl_desc))
logging.debug("Done:\t " + hyperParamSearch + "RandomSearch best settings")
else:
clKWARGS = kwargs[CL_type + "KWARGS"]
return clKWARGS
def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, y_train, imagesAnalysis):
logging.info(stringAnalysis)
outputTextFile = open(outputFileName + '.txt', 'w')
outputTextFile.write(stringAnalysis)
outputTextFile.close()
np.savetxt(outputFileName + "full_pred.csv", full_labels_pred.astype(np.int16), delimiter=",")
np.savetxt(outputFileName + "train_pred.csv", y_train_pred.astype(np.int16), delimiter=",")
np.savetxt(outputFileName + "train_labels.csv", y_train.astype(np.int16), delimiter=",")
if imagesAnalysis is not None:
for imageName in imagesAnalysis:
if os.path.isfile(outputFileName + imageName + ".png"):
for i in range(1, 20):
testFileName = outputFileName + imageName + "-" + str(i) + ".png"
if not os.path.isfile(testFileName):
imagesAnalysis[imageName].savefig(testFileName)
break
imagesAnalysis[imageName].savefig(outputFileName + imageName + '.png')
if __name__ == '__main__':
"""The goal of this part of the module is to be able to execute a monoview experimentation
on a node of a cluster independently.
......
import numpy as np
from pyscm.scm import SetCoveringMachineClassifier as scm
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.externals.six import iteritems
def genName(config):
return "FatSCMLateFusion"
def getBenchmark(benchmark, args=None):
benchmark["Multiview"]["FatSCMLateFusion"] = ["take_everything"]
return benchmark
def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices):
argumentsList = []
monoviewDecisions = np.transpose(np.array([monoviewResult[1][3] for monoviewResult in resultsMonoview]))
arguments = {"CL_type": "FatSCMLateFusion",
"views": ["all"],
"NB_VIEW": len(resultsMonoview),
"viewsIndices": range(len(resultsMonoview)),
"NB_CLASS": len(args.CL_classes),
"LABELS_NAMES": args.CL_classes,
"FatSCMLateFusionKWARGS": {
"monoviewDecisions": monoviewDecisions,
"p": args.FSCMLF_p,
"max_attributes": args.FSCMLF_max_attributes,
"model":args.FSCMLF_model,
}
}
argumentsList.append(arguments)
return argumentsList
def genParamsSets(classificationKWARGS, randomState, nIter=1):
"""Used to generate parameters sets for the random hyper parameters optimization function"""
paramsSets = []
for _ in range(nIter):
max_attributes = randomState.randint(1, 20)
p = randomState.random_sample()
model = randomState.choice(["conjunction", "disjunction"])
paramsSets.append([p, max_attributes, model])
return paramsSets
class FatSCMLateFusionClass:
def __init__(self, randomState, NB_CORES=1, **kwargs):
if kwargs["p"]:
self.p = kwargs["p"]
else:
self.p = 0.5
if kwargs["max_attributes"]:
self.max_attributes = kwargs["max_attributes"]
else:
self.max_attributes = 5
if kwargs["model"]:
self.model = kwargs["model"]
else:
self.model = "conjunction"
self.monoviewDecisions = kwargs["monoviewDecisions"]
self.randomState = randomState
def setParams(self, paramsSet):
self.p = paramsSet[0]
self.max_attributes = paramsSet[1]
self.model = paramsSet[2]
def fit_hdf5(self, DATASET, labels, trainIndices=None, viewsIndices=None, metric=["f1_score", None]):
features = self.monoviewDecisions[trainIndices]
self.SCMClassifier = DecisionStumpSCMNew(p=self.p, max_rules=self.max_attributes, model_type=self.model,
random_state=self.randomState)
self.SCMClassifier.fit(features, labels[trainIndices].astype(int))
def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None):
if usedIndices is None:
usedIndices = range(DATASET.get("Metadata").attrs["datasetLength"])
predictedLabels = self.SCMClassifier.predict(self.monoviewDecisions[usedIndices])
return predictedLabels
def predict_probas_hdf5(self, DATASET, usedIndices=None):
pass
def getConfigString(self, classificationKWARGS):
return "p : "+str(self.p)+", max_aributes : "+str(self.max_attributes)+", model : "+self.model
def getSpecificAnalysis(self, classificationKWARGS):
stringAnalysis = ''
return stringAnalysis
class DecisionStumpSCMNew(BaseEstimator, ClassifierMixin):
"""docstring for SCM
A hands on class of SCM using decision stump, built with sklearn format in order to use sklearn function on SCM like
CV, gridsearch, and so on ..."""
def __init__(self, model_type='conjunction', p=0.1, max_rules=10, random_state=42):
super(DecisionStumpSCMNew, self).__init__()
self.model_type = model_type
self.p = p
self.max_rules = max_rules
self.random_state = random_state
def fit(self, X, y):
self.clf = scm(model_type=self.model_type, max_rules=self.max_rules, p=self.p, random_state=self.random_state)
self.clf.fit(X=X, y=y)
def predict(self, X):
return self.clf.predict(X)
def set_params(self, **params):
for key, value in iteritems(params):
if key == 'p':
self.p = value
if key == 'model_type':
self.model_type = value
if key == 'max_rules':
self.max_rules = value
def get_stats(self):
return {"Binary_attributes": self.clf.model_.rules}
from . import FatSCMLateFusionModule, analyzeResults
\ No newline at end of file
from ...Multiview import analyzeResults
# Author-Info
__author__ = "Baptiste Bauvin"
__status__ = "Prototype" # Production, Development, Prototype
def execute(classifier, trainLabels,
testLabels, DATASET,
classificationKWARGS, classificationIndices,
LABELS_DICTIONARY, views, nbCores, times,
name, KFolds,
hyperParamSearch, nIter, metrics,
viewsIndices, randomState, labels, classifierModule):
return analyzeResults.execute(classifier, trainLabels,
testLabels, DATASET,
classificationKWARGS, classificationIndices,
LABELS_DICTIONARY, views, nbCores, times,
name, KFolds,
hyperParamSearch, nIter, metrics,
viewsIndices, randomState, labels, classifierModule)
\ No newline at end of file
......@@ -45,7 +45,6 @@ class DecisionStumpSCMNew(BaseEstimator, ClassifierMixin):
def genParamsSets(classificationKWARGS, randomState, nIter=1):
nbView = classificationKWARGS["nbView"]
paramsSets = []
for _ in range(nIter):
max_attributes = randomState.randint(1, 20)
......
......@@ -198,10 +198,23 @@ def parseTheArgs(arguments):
default=[])
groupMumboNew = parser.add_argument_group('New Mumbo implementation arguments')
groupFatLateFusion.add_argument('--MUN_n_estimators', metavar='INT', action='store',
groupMumboNew.add_argument('--MUN_n_estimators', metavar='INT', action='store',
help='Determine the number of esitmators for mumbo', type=int,
default=10)
groupFatSCMLateFusion = parser.add_argument_group('Fat SCM Late Fusion arguments')
groupFatSCMLateFusion.add_argument('--FSCMLF_p', metavar='FLOAT', action='store',
help='Determine the p argument of the SCM', type=float,
default=0.5)
groupFatSCMLateFusion.add_argument('--FSCMLF_max_attributes', metavar='INT', action='store',
help='Determine the maximum number of aibutes used by the SCM', type=int,
default=4)
groupFatSCMLateFusion.add_argument('--FSCMLF_model', metavar='STRING', action='store',
help='Determine the model type of the SCM',
default="conjunction")
args = parser.parse_args(arguments)
return args
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment