Skip to content
Snippets Groups Projects
Commit 1c4eea32 authored by bbauvin's avatar bbauvin
Browse files

Working copy with gridsearch and basic analysis result

parent 57e9e3c2
No related branches found
No related tags found
No related merge requests found
Showing
with 48 additions and 139 deletions
......@@ -64,10 +64,14 @@ groupClass.add_argument('--CL_metrics', metavar='STRING', action='store',
'first one will be used for gridsearch', default='')
groupClass.add_argument('--CL_GS_iter', metavar='INT', action='store',
help='Determine how many Randomized grid search tests to do', type=int, default=30)
groupClass.add_argument('--CL_NoGS', action='store_false',
help='Determine how many Randomized grid search tests to do')
groupRF = parser.add_argument_group('Random Forest arguments')
groupRF.add_argument('--CL_RF_trees', metavar='STRING', action='store', help='GridSearch: Determine the trees',
default='25 75 125 175')
groupRF.add_argument('--CL_RF_max_depth', metavar='STRING', action='store', help='GridSearch: Determine the trees',
default='5:10:15')
groupSVMLinear = parser.add_argument_group('Linear SVM arguments')
groupSVMLinear.add_argument('--CL_SVML_C', metavar='STRING', action='store', help='GridSearch : Penalty parameters used',
......@@ -113,7 +117,7 @@ groupMumbo.add_argument('--MU_config', metavar='STRING', action='store', nargs='
default=['3:1.0', '3:1.0', '3:1.0','3:1.0'])
groupMumbo.add_argument('--MU_iter', metavar='INT', action='store', nargs=3,
help='Max number of iteration, min number of iteration, convergence threshold', type=float,
default=[1000, 300, 0.0005])
default=[100, 3, 0.005])
groupFusion = parser.add_argument_group('Fusion arguments')
groupFusion.add_argument('--FU_types', metavar='STRING', action='store',
......@@ -136,6 +140,11 @@ groupFusion.add_argument('--FU_cl_config', metavar='STRING', action='store', nar
args = parser.parse_args()
nbCores = args.CL_cores
try:
gridSearch = args.CL_NoGS
except:
gridSearch = True
directory = os.path.dirname(os.path.abspath(__file__)) + "/Results/"
logFileName = time.strftime("%Y%m%d-%H%M%S") + "-CMultiV-" + args.CL_type + "-" + "_".join(args.views.split(":")) + "-" + args.name + \
"-LOG"
......@@ -209,19 +218,19 @@ if "Monoview" in args.CL_type.strip(":"):
fusionClassifierConfig = "a"
fusionMethodConfig = ["q", "b"]
fusionMethodConfig = [["0.25", "0.25", "0.25", "0.25"], "b"]
mumboClassifierConfig = "a"
mumboclassifierNames = "a"
RandomForestKWARGS = {"0":map(int, args.CL_RF_trees.split())}
SVMLinearKWARGS = {"0":map(int, args.CL_SVML_C.split(":"))}
SVMRBFKWARGS = {"0":map(int, args.CL_SVMR_C.split(":"))}
SVMPolyKWARGS = {"0":map(int, args.CL_SVMP_C.split(":")), '1':map(int, args.CL_SVMP_deg.split(":"))}
DecisionTreeKWARGS = {"0":map(int, args.CL_DT_depth.split(":"))}
SGDKWARGS = {"0": map(float, args.CL_SGD_alpha.split(":")), "1":args.CL_SGD_loss.split(":"),
"2": args.CL_SGD_penalty.split(":")}
KNNKWARGS = {"0": map(float, args.CL_KNN_neigh.split(":"))}
AdaboostKWARGS = {"0": args.CL_Ada_n_est.split(":"), "1": args.CL_Ada_b_est.split(":")}
RandomForestKWARGS = {"0":map(int, args.CL_RF_trees.split())[0], "1":map(int, args.CL_RF_max_depth.split(":"))[0]}
SVMLinearKWARGS = {"0":map(int, args.CL_SVML_C.split(":"))[0]}
SVMRBFKWARGS = {"0":map(int, args.CL_SVMR_C.split(":"))[0]}
SVMPolyKWARGS = {"0":map(int, args.CL_SVMP_C.split(":"))[0], '1':map(int, args.CL_SVMP_deg.split(":"))[0]}
DecisionTreeKWARGS = {"0":map(int, args.CL_DT_depth.split(":"))[0]}
SGDKWARGS = {"2": map(float, args.CL_SGD_alpha.split(":"))[0], "1": args.CL_SGD_penalty.split(":")[0],
"0":args.CL_SGD_loss.split(":")[0]}
KNNKWARGS = {"0": map(float, args.CL_KNN_neigh.split(":"))[0]}
AdaboostKWARGS = {"0": args.CL_Ada_n_est.split(":")[0], "1": args.CL_Ada_b_est.split(":")[0]}
argumentDictionaries = {"Monoview": {}, "Multiview": []}
......@@ -243,35 +252,36 @@ resultsMonoview = []
for viewIndex, viewArguments in enumerate(argumentDictionaries["Monoview"].values()):
resultsMonoview.append( (Parallel(n_jobs=nbCores)(
delayed(ExecMonoview)(DATASET.get("View"+str(viewIndex)), DATASET.get("labels").value, args.name,
args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF, gridSearch=True,
args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF, gridSearch=gridSearch,
metric=metric, nIter=args.CL_GS_iter, **arguments)
for arguments in viewArguments)))
accuracies = [result[1] for result in resultsMonoview[viewIndex]]
classifiersNames = [result[0] for result in resultsMonoview[viewIndex]]
classifiersConfigs = [result[2] for result in resultsMonoview[viewIndex]]
print classifiersConfigs
bestClassifiers.append(classifiersNames[np.argmax(np.array(accuracies))])
bestClassifiersConfigs.append(classifiersConfigs[np.argmax(np.array(accuracies))])
# bestClassifiers = ["DecisionTree", "DecisionTree", "DecisionTree", "DecisionTree"]
# bestClassifiersConfigs = [["1"],["1"],["1"],["1"]]
try:
if benchmark["Multiview"]:
try:
if benchmark["Multiview"]["Mumbo"]:
for combination in itertools.combinations_with_replacement(range(len(benchmark["Multiview"]["Mumbo"])), NB_VIEW):
classifiersNames = [benchmark["Multiview"]["Mumbo"][index] for index in combination]
arguments = {"CL_type": "Mumbo",
"views": args.views.split(":"),
"NB_VIEW": len(args.views.split(":")),
"NB_CLASS": len(args.CL_classes.split(":")),
"LABELS_NAMES": args.CL_classes.split(":"),
"MumboKWARGS": {"classifiersNames": classifiersNames,
"maxIter":int(args.MU_iter[0]), "minIter":int(args.MU_iter[1]),
"threshold":args.MU_iter[2],
"classifiersConfigs": [argument.split(":") for argument in args.MU_config]}}
argumentDictionaries["Multiview"].append(arguments)
except:
pass
# try:
# if benchmark["Multiview"]["Mumbo"]:
# for combination in itertools.combinations_with_replacement(range(len(benchmark["Multiview"]["Mumbo"])), NB_VIEW):
# classifiersNames = [benchmark["Multiview"]["Mumbo"][index] for index in combination]
# arguments = {"CL_type": "Mumbo",
# "views": args.views.split(":"),
# "NB_VIEW": len(args.views.split(":")),
# "NB_CLASS": len(args.CL_classes.split(":")),
# "LABELS_NAMES": args.CL_classes.split(":"),
# "MumboKWARGS": {"classifiersNames": classifiersNames,
# "maxIter":int(args.MU_iter[0]), "minIter":int(args.MU_iter[1]),
# "threshold":args.MU_iter[2],
# "classifiersConfigs": [argument.split(":") for argument in args.MU_config]}}
# argumentDictionaries["Multiview"].append(arguments)
# except:
# pass
try:
if benchmark["Multiview"]["Fusion"]:
try:
......@@ -299,8 +309,8 @@ try:
"NB_CLASS": len(args.CL_classes.split(":")),
"LABELS_NAMES": args.CL_classes.split(":"),
"FusionKWARGS": {"fusionType":"EarlyFusion", "fusionMethod":method,
"classifiersNames": classifier,
"classifiersConfigs": fusionClassifierConfig,
"classifiersNames": [classifier],
"classifiersConfigs": [globals()[classifier+"KWARGS"]],
'fusionMethodConfig': fusionMethodConfig}}
argumentDictionaries["Multiview"].append(arguments)
except:
......@@ -314,7 +324,7 @@ except:
# print len(argumentDictionaries["Multiview"]), len(argumentDictionaries["Monoview"])
resultsMultiview = Parallel(n_jobs=nbCores)(
delayed(ExecMultiview)(DATASET, args.name, args.CL_split, args.CL_nbFolds, 1, args.type, args.pathF,
LABELS_DICTIONARY, #gridSearch=True,
LABELS_DICTIONARY, gridSearch=gridSearch,
metrics=metrics, **arguments)
for arguments in argumentDictionaries["Multiview"])
......
......@@ -43,6 +43,7 @@ def ExecMonoview(X, Y, name, learningRate, nbFolds, nbCores, databaseType, path,
CL_type = kwargs["CL_type"]
classifierKWARGS = kwargs[CL_type+"KWARGS"]
X = X.value
clKWARGS = kwargs[kwargs["CL_type"]+"KWARGS"]
# Determine the Database to extract features
logging.debug("### Main Programm for Classification MonoView")
......@@ -64,8 +65,10 @@ def ExecMonoview(X, Y, name, learningRate, nbFolds, nbCores, databaseType, path,
classifierModule = getattr(MonoviewClassifiers, CL_type)
classifierGridSearch = getattr(classifierModule, "gridSearch")
if gridSearch:
cl_desc = classifierGridSearch(X_train, y_train, nbFolds=nbFolds, nbCores=nbCores, metric=metric, nIter=nIter)
cl_res = classifierModule.fit(X_train, y_train, NB_CORES=nbCores, **dict((str(index), desc) for index, desc in enumerate(cl_desc)))
clKWARGS = dict((str(index), desc) for index, desc in enumerate(cl_desc))
cl_res = classifierModule.fit(X_train, y_train, NB_CORES=nbCores, **clKWARGS)
t_end = time.time() - t_start
# Add result to Results DF
......@@ -96,7 +99,7 @@ def ExecMonoview(X, Y, name, learningRate, nbFolds, nbCores, databaseType, path,
#Accuracy classification score
accuracy_score = ExportResults.accuracy_score(y_test, y_test_pred)
logging.info("Accuracy :" +str(accuracy_score))
cl_desc = [value for key, value in sorted(clKWARGS.iteritems())]
return [CL_type, accuracy_score, cl_desc, feat]
# # Classification Report with Precision, Recall, F1 , Support
# logging.debug("Info:\t Classification report:")
......
......@@ -187,7 +187,7 @@ def showResults(directory, filename, db, feat, score):
# Function to calculate the accuracy score for test data
def accuracy_score(y_test, y_test_pred):
return str(metrics.accuracy_score(y_test, y_test_pred))
return metrics.accuracy_score(y_test, y_test_pred)
# Function to calculate a report of classifiaction and store it
......
2016-08-19 20:16:59,329 DEBUG: ### Main Programm for Classification MonoView
2016-08-19 20:16:59,330 DEBUG: ### Classification - Database:MultiOmicDataset Feature:RNASeq train_size:0.5, CrossValidation k-folds:5, cores:4
2016-08-19 20:17:45,654 DEBUG: ### Main Programm for Classification MonoView
2016-08-19 20:17:45,655 DEBUG: ### Classification - Database:MultiOmicDataset Feature:RNASeq train_size:0.5, CrossValidation k-folds:5, cores:4
2016-08-19 20:18:13,897 DEBUG: ### Main Programm for Classification MonoView
2016-08-19 20:18:13,897 DEBUG: ### Classification - Database:MultiOmicDataset Feature:RNASeq train_size:0.5, CrossValidation k-folds:5, cores:4
2016-08-19 20:18:13,897 DEBUG: Start: Read hdf5 Files
2016-08-19 20:19:25,358 DEBUG: ### Main Programm for Classification MonoView
2016-08-19 20:19:25,358 DEBUG: ### Classification - Database:MultiOmicDataset Feature:RNASeq train_size:0.5, CrossValidation k-folds:5, cores:4
2016-08-19 20:19:25,359 DEBUG: Start: Read hdf5 Files
2016-08-19 20:19:51,424 DEBUG: ### Main Programm for Classification MonoView
2016-08-19 20:19:51,424 DEBUG: ### Classification - Database:MultiOmicDataset Feature:RNASeq train_size:0.5, CrossValidation k-folds:5, cores:4
2016-08-19 20:19:51,424 DEBUG: Start: Read hdf5 Files
2016-08-19 20:20:30,839 DEBUG: ### Main Programm for Classification MonoView
2016-08-19 20:20:30,840 DEBUG: ### Classification - Database:MultiOmicDataset Feature:RNASeq train_size:0.5, CrossValidation k-folds:5, cores:4
2016-08-19 20:20:30,840 DEBUG: Start: Read hdf5 Files
2016-08-19 20:20:32,791 DEBUG: Info: Shape of Feature:(347, 73599), Length of classLabels vector:(347,)
2016-08-19 20:20:32,791 DEBUG: Done: Read CSV Files
2016-08-19 20:20:32,791 DEBUG: Start: Determine Train/Test split
2016-08-19 20:20:32,882 DEBUG: Info: Shape X_train:(173, 73599), Length of y_train:173
2016-08-19 20:20:32,883 DEBUG: Info: Shape X_test:(174, 73599), Length of y_test:174
2016-08-19 20:20:32,883 DEBUG: Done: Determine Train/Test split
2016-08-19 20:20:32,883 DEBUG: Start: Classification
2016-08-19 20:20:50,330 DEBUG: Info: Time for Classification: 19.4069910049[s]
2016-08-19 20:20:50,330 DEBUG: Done: Classification
2016-08-19 20:20:50,330 DEBUG: Start: Exporting to CSV
2016-08-19 20:20:50,334 DEBUG: Done: Exporting to CSV
2016-08-19 20:23:50,436 DEBUG: ### Main Programm for Classification MonoView
2016-08-19 20:23:50,437 DEBUG: ### Classification - Database:MultiOmicDataset Feature:RNASeq train_size:0.5, CrossValidation k-folds:5, cores:4
2016-08-19 20:23:50,437 DEBUG: Start: Read hdf5 Files
2016-08-19 20:23:50,527 DEBUG: Info: Shape of Feature:(347, 73599), Length of classLabels vector:(347,)
2016-08-19 20:23:50,528 DEBUG: Done: Read CSV Files
2016-08-19 20:23:50,528 DEBUG: Start: Determine Train/Test split
2016-08-19 20:23:50,608 DEBUG: Info: Shape X_train:(173, 73599), Length of y_train:173
2016-08-19 20:23:50,608 DEBUG: Info: Shape X_test:(174, 73599), Length of y_test:174
2016-08-19 20:23:50,608 DEBUG: Done: Determine Train/Test split
2016-08-19 20:23:50,608 DEBUG: Start: Classification
;a_class_time;b_cl_desc;c_cl_res;d_cl_score
0;2.79198694229;Classif_RF-CV_2-Trees_175;"GridSearchCV(cv=2, error_score='raise',
estimator=Pipeline(steps=[('classifier', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
max_depth=None, max_features='auto', max_leaf_nodes=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
oob_score=False, random_state=None, verbose=0,
warm_start=False))]),
fit_params={}, iid=True, n_jobs=1,
param_grid={'classifier__n_estimators': [25, 75, 125, 175]},
pre_dispatch='2*n_jobs', refit=True, scoring='accuracy', verbose=0)";0.788461538462
;a_class_time;b_cl_desc;c_cl_res;d_cl_score
0;0.0805540084839;Classif_Lasso-CV_2-;"GridSearchCV(cv=2, error_score='raise',
estimator=Pipeline(steps=[('classifier', SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
eta0=0.0, fit_intercept=True, l1_ratio=0.15,
learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1,
penalty='l2', power_t=0.5, random_state=None, shuffle=True,
verbose=0, warm_start=False))]),
fit_params={}, iid=True, n_jobs=1,
param_grid={'classifier__loss': ['log'], 'classifier__alpha': [0.1, 0.2, 0.5, 0.9], 'classifier__penalty': ['l2']},
pre_dispatch='2*n_jobs', refit=True, scoring='accuracy', verbose=0)";0.673076923077
;a_class_time;b_cl_desc;c_cl_res;d_cl_score
0;0.0442838668823;Classif_DT-CV_2-;"GridSearchCV(cv=2, error_score='raise',
estimator=Pipeline(steps=[('classifier', DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
min_samples_split=2, min_weight_fraction_leaf=0.0,
presort=False, random_state=None, splitter='best'))]),
fit_params={}, iid=True, n_jobs=1,
param_grid={'classifier__max_depth': [1, 3, 5, 7]},
pre_dispatch='2*n_jobs', refit=True, scoring='accuracy', verbose=0)";0.817307692308
;a_class_time;b_cl_desc;c_cl_res;d_cl_score
0;2.47204518318;Classif_RF-CV_2-Trees_175;"GridSearchCV(cv=2, error_score='raise',
estimator=Pipeline(steps=[('classifier', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
max_depth=None, max_features='auto', max_leaf_nodes=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
oob_score=False, random_state=None, verbose=0,
warm_start=False))]),
fit_params={}, iid=True, n_jobs=1,
param_grid={'classifier__n_estimators': [25, 75, 125, 175]},
pre_dispatch='2*n_jobs', refit=True, scoring='accuracy', verbose=0)";0.782051282051
;a_class_time;b_cl_desc;c_cl_res;d_cl_score
0;0.0795521736145;Classif_Lasso-CV_2-;"GridSearchCV(cv=2, error_score='raise',
estimator=Pipeline(steps=[('classifier', SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
eta0=0.0, fit_intercept=True, l1_ratio=0.15,
learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1,
penalty='l2', power_t=0.5, random_state=None, shuffle=True,
verbose=0, warm_start=False))]),
fit_params={}, iid=True, n_jobs=1,
param_grid={'classifier__loss': ['log'], 'classifier__alpha': [0.1, 0.2, 0.5, 0.9], 'classifier__penalty': ['l2']},
pre_dispatch='2*n_jobs', refit=True, scoring='accuracy', verbose=0)";0.74358974359
;Non;Oui;All
Non;0.5;0.0;0.428571428571
Oui;0.05;0.2;0.0714285714286
All;0.55;0.2;0.5
;Non;Oui;All
Non;0.480769230769;0.0555555555556;0.371428571429
Oui;0.0384615384615;0.388888888889;0.128571428571
All;0.519230769231;0.444444444444;0.5
;Non;Oui;All
Non;0.5;0.0;0.385714285714
Oui;0.0925925925926;0.1875;0.114285714286
All;0.592592592593;0.1875;0.5
;Non;Oui;All
Non;0.28;0.55;0.357142857143
Oui;0.16;0.1;0.142857142857
All;0.44;0.65;0.5
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment