diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..2f047f644136b6340454b39188f30174d6820f2e Binary files /dev/null and b/.DS_Store differ diff --git a/Class_AddExp_online.py b/Class_AddExp_online.py new file mode 100644 index 0000000000000000000000000000000000000000..4935efd49c9df7947f4cc224fbcc23640a91de86 --- /dev/null +++ b/Class_AddExp_online.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Mar 27 22:17:58 2020 + +@author: taopeng +""" +import numpy as np +from sklearn.linear_model import SGDRegressor +from sklearn.linear_model import PassiveAggressiveRegressor +import random +from sklearn.linear_model import LinearRegression +from sklearn import svm +from sklearn.linear_model import Ridge +from sklearn.linear_model import Lasso +from sklearn.preprocessing import MinMaxScaler + + +class Scaling(): + def fit(self, U): + self.maxU = np.max(U) + self.minU = np.min(U) + + def trassform(self , X): + return [(c - self.minU)/(self.maxU - self.minU) for c in X ] + + def inverse_transform(self , X): + return [ c*(self.maxU - self.minU)+self.minU for c in X ] + + + +class AddExp(): + def __init__(self, + X_train, + Y_train, + beta = 0.5, + gamma = 0.1, + tau = 0.5, + maxNumExpert = 25, + flagBoosting = 1, + flatPuringWorstFirst = 1): + + self.beta = beta + self.gamma = gamma + self.tau = tau + self.maxNumExpert = maxNumExpert + self.flagBoosting = flagBoosting + + self.scalingY = Scaling() + self.scalingY.fit(Y_train) + Y_train = self.scalingY.trassform(Y_train) + + + self.index_New_expert = 0 + + self.dic_expert = {} + while(len(self.dic_expert) < self.maxNumExpert): + # dic_expert[index_New_expert]= [SGDRegressor(eta0=0.000001),tau,'online'] + self.dic_expert[self.index_New_expert]= [PassiveAggressiveRegressor(),self.tau,'online'] + self.dic_expert[self.index_New_expert][0].partial_fit(X_train , Y_train) + self.index_New_expert +=1 + + + + def predict(self,X_test): + self.prediction_voted = [0]*len(X_test) + sum_weight = 0 + prediction_allExpert_step = {}# for store the prediction of each expert + for key,value in self.dic_expert.items(): + # call one expert, getting the predicttion of this expert + prediction_allExpert_step[key] = value[0].predict(X_test) + # getting weighted prediction + prediction_oneExpert_weighted = [c*value[1] for c in prediction_allExpert_step[key]] + #accumate the weighted prediction one by one + self.prediction_voted = list(map(lambda a , b : (a+b), self.prediction_voted , prediction_oneExpert_weighted)) + #accumate the weight one by one + sum_weight += value[1] + #getting the weighted vote + self.prediction_voted = list(map(lambda a : a/sum_weight, self.prediction_voted)) + return self.scalingY.inverse_transform(self.prediction_voted) + + def get_RMSE(self,X_test, Y_test): + MSE = list(map(lambda a,b : (a-b)**2, self.predict(X_test) , Y_test)) + MSE = np.average(MSE)**0.5 + return MSE + + def get_listResidualAbu(self,X_test, Y_test): + return list(map(lambda a,b : np.absolute(a-b), self.predict(X_test) , Y_test)) + + def get_listResidual(self,X_test, Y_test): + return list(map(lambda a,b : a-b, self.predict(X_test) , Y_test)) + + def partial_fit(self,X_test, Y_test): + Y_test = self.scalingY.trassform(Y_test) + loss_ensemble = np.average(list(map(lambda a,b : np.absolute(a-b), self.scalingY.trassform(self.predict(X_test)),Y_test))) + + #calculate the loss of each expert in this batch + # and update its weight + # by the way, getting the name of worest one + # by the way , getting the name of oldest one + worestLoss = 0 + oldestKey = 100000 + for key,value in self.dic_expert.items(): + loss_oneExpert = np.average(list(map(lambda a,b : np.absolute(a-b),self.scalingY.trassform(value[0].predict(X_test)),Y_test))) + self.dic_expert[key][1] = self.dic_expert[key][1]*self.beta**loss_oneExpert + if loss_oneExpert > worestLoss: + worestLoss = loss_oneExpert + keyOfWorest = key + if oldestKey > key: + oldestKey = key + # updating all expert + if value[2] == 'online': + value[0].partial_fit(X_test,Y_test) + + #if we needs new expert + if loss_ensemble > self.gamma: + # if loss_ensemble > self.gamma or : + # randomly select a model form linearRregression or SVR + rand = random.randint(0,1) + if rand == 0: + self.dic_expert[self.index_New_expert]= [SGDRegressor(eta0=0.000001),self.tau,'online'] + elif rand == 1: + self.dic_expert[self.index_New_expert]= [PassiveAggressiveRegressor(),self.tau,'online'] + elif rand == 2: + self.dic_expert[self.index_New_expert]= [LinearRegression(),self.tau,'static'] + elif rand == 3: + self.dic_expert[self.index_New_expert]= [svm.SVR(gamma='auto'),self.tau,'static'] + elif rand == 4: + self.dic_expert[self.index_New_expert]= [Ridge(alpha=.5),self.tau,'static'] + elif rand == 5: + self.dic_expert[self.index_New_expert]= [Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,normalize=False, positive=False, precompute=False, random_state=None,selection='cyclic', tol=0.0001, warm_start=False) + ,self.tau,'static'] + + if self.flagBoosting == 1: + boostX = [] + boostY = [] + while(len(boostY)<len(X_test)): + index_instance = random.randint(0,len(X_test)-1) + # getting loss of normalised instance + loss_instance = np.absolute( self.prediction_voted[index_instance] - Y_test[index_instance]) + #determine the number of repeat of instances as his real loss + numBoosting = np.random.poisson(loss_instance/loss_ensemble) + for k in range(numBoosting): + boostX.append(X_test[index_instance]) + boostY.append(Y_test[index_instance]) + + if self.dic_expert[self.index_New_expert][2] == 'online': + self.dic_expert[self.index_New_expert][0].partial_fit(boostX , boostY) + else: + self.dic_expert[self.index_New_expert][0].fit(boostX , boostY) + + else: + if self.dic_expert[self.index_New_expert][2] == 'static': + self.dic_expert[self.index_New_expert][0].partial_fit(X_test , Y_test) + else: + self.dic_expert[self.index_New_expert][0].fit(X_test , Y_test) + self.index_New_expert +=1 + + # purning some expert + if len(self.dic_expert) > self.maxNumExpert: + self.dic_expert.pop(keyOfWorest) + + # print("# expert" , len(self.dic_expert)) + # for key,value in self.dic_expert.items(): + # print('weight ' , value[1], end = '') + + +# X,Y =np.load("X_Y_RH2.npy") +# X,Y =np.load("HYPER10_Incremental.npy") +# X,Y =np.load("HYPER10_Gradual.npy") +# X,Y =np.load("HYPER10_Sudden.npy") +# X,Y =np.load("HYPER10_Reccouring.npy") + +# X,Y =np.load("Fried_Incremental.npy") +# X,Y =np.load("Fried_Gradual.npy") +# X,Y =np.load("Fried_Sudden.npy") +# X,Y =np.load("Fried_Reccouring.npy") + + +# tauxOfIni = 0.01 +# step = 100 +# X = list(X) +# Y= list(Y) + +# STD_X = MinMaxScaler().fit(X) +# X = list(STD_X.transform(X)) + +# numOfini = int(tauxOfIni*len(X)) +# X_train = X[:numOfini] +# X_test = X[numOfini:] +# Y_train = Y[:numOfini] +# Y_test = Y[numOfini:] + + +# myAddExp = AddExp(X_train, Y_train) +# residual_carre = [] +# i = 0 +# while(i< len(Y_test)-step): +# residual_carre.append(myAddExp.get_RMSE(X_test[i:i+step],Y_test[i:i+step])) +# # print(myAddExp.get_RMSE(X_test[i:i+step],Y_test[i:i+step])) +# # test = myAddExp.predit(X_test[i:i+step]) +# myAddExp.partial_fit(X_test[i:i+step],Y_test[i:i+step]) +# i += step + +# print("the mean of RMSE by AddExp is" , np.average(residual_carre)) +# print("the Standard Deviation of RMSE is",np.std(residual_carre)) + +# import matplotlib.pyplot as plt +# plt.plot(range(len(residual_carre)), residual_carre, linestyle = ':', color = 'red', label = " B-NNRW") +# plt.legend(loc='best') +# plt.ylabel("RMSE") +# plt.xlabel('time') +# np.save('RMSE_AddExp_onlineExpert.npy',residual_carre) \ No newline at end of file diff --git a/Class_AddExp_online_RNN.py b/Class_AddExp_online_RNN.py new file mode 100755 index 0000000000000000000000000000000000000000..e62495a24bc2a86537857f646186bdb2875a431d --- /dev/null +++ b/Class_AddExp_online_RNN.py @@ -0,0 +1,312 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Mar 27 22:17:58 2020 + +@author: taopeng +""" +import numpy as np +from sklearn.linear_model import SGDRegressor +from sklearn.linear_model import PassiveAggressiveRegressor +import random +from sklearn.linear_model import LinearRegression +from sklearn.neural_network import MLPRegressor + +from sklearn import svm +from sklearn.linear_model import Ridge +from sklearn.linear_model import Lasso +from sklearn import tree + +from sklearn.preprocessing import MinMaxScaler + +# from keras.models import Sequential +# from keras.layers import Dense +# from keras.layers import GRU +# from keras.layers import LSTM + +''' +Because the Apple M1 chip cannot support RNN, + this part of the sub-model is commented out + +''' + +class Scaling(): + def fit(self, U): + self.maxU = np.max(U) + self.minU = np.min(U) + + def trassform(self , X): + return [(c - self.minU)/(self.maxU - self.minU) for c in X ] + + def inverse_transform(self , X): + return [ c*(self.maxU - self.minU)+self.minU for c in X ] + + + +class AddExp(): + def __init__(self, + X_train, + Y_train, + beta = 0.5, + gamma = 0.01, + tau = 0.5, + maxNumExpert = 25, + flagBoosting = 1, + flatPuringWorstFirst = 1): + + self.beta = beta + self.gamma = gamma + self.tau = tau + self.maxNumExpert = maxNumExpert + self.flagBoosting = flagBoosting + + self.scalingY = Scaling() + self.scalingY.fit(Y_train) + Y_train = self.scalingY.trassform(Y_train) + + + self.index_New_expert = 0 + + self.dic_expert = {} + while(len(self.dic_expert) < self.maxNumExpert-3): + # dic_expert[index_New_expert]= [SGDRegressor(eta0=0.000001),tau,'online'] + self.dic_expert[self.index_New_expert]= [PassiveAggressiveRegressor(),self.tau,'online'] + self.dic_expert[self.index_New_expert][0].partial_fit(X_train , Y_train) + self.index_New_expert +=1 + self.dic_expert[self.index_New_expert]= [MLPRegressor(),self.tau,'online'] + self.dic_expert[self.index_New_expert][0].partial_fit(X_train , Y_train) + self.index_New_expert +=1 + + +# X_train_for_RNN = np.array(X_train) +# X_train_for_RNN = np.reshape(X_train_for_RNN, (X_train_for_RNN.shape[0], 1, X_train_for_RNN.shape[1])) +# Y_train_for_RNN = np.array(Y_train) + # self.dic_expert[self.index_New_expert]= [Sequential(),self.tau,'RNN'] +# self.dic_expert[self.index_New_expert][0].add(GRU(8, input_shape=(1,X_train_for_RNN.shape[2]))) +# self.dic_expert[self.index_New_expert][0].add(Dense(1)) +# self.dic_expert[self.index_New_expert][0].compile(loss='mean_squared_error', optimizer='adam') +# self.dic_expert[self.index_New_expert][0].fit(X_train_for_RNN, +# Y_train_for_RNN, +# epochs=5, +# batch_size=100, +# verbose=2) +# self.index_New_expert +=1 + # self.dic_expert[self.index_New_expert]= [Sequential(),self.tau,'RNN'] +# self.dic_expert[self.index_New_expert][0].add(LSTM(8, input_shape=(1,X_train_for_RNN.shape[2]))) +# self.dic_expert[self.index_New_expert][0].add(Dense(1)) +# self.dic_expert[self.index_New_expert][0].compile(loss='mean_squared_error', optimizer='adam') +# self.dic_expert[self.index_New_expert][0].fit(X_train_for_RNN, +# Y_train_for_RNN, +# epochs=5, +# batch_size=100, +# verbose=2) +# self.index_New_expert +=1 + + + + + def predict(self,X_test): + self.prediction_voted = [0]*len(X_test) + sum_weight = 0 + prediction_allExpert_step = {}# for store the prediction of each expert + for key,value in self.dic_expert.items(): + # call one expert, getting the predicttion of this expert + if value[2] == 'RNN': + # print(X_test) + X_test_for_RNN = np.array(X_test) + X_test_for_RNN = np.reshape(X_test_for_RNN, (X_test_for_RNN.shape[0], 1, X_test_for_RNN.shape[1])) + prediction_allExpert_step[key] = value[0].predict(X_test_for_RNN) + + else: + prediction_allExpert_step[key] = value[0].predict(X_test) + # getting weighted prediction + prediction_oneExpert_weighted = [c*value[1] for c in prediction_allExpert_step[key]] + #accumate the weighted prediction one by one + self.prediction_voted = list(map(lambda a , b : (a+b), self.prediction_voted , prediction_oneExpert_weighted)) + #accumate the weight one by one + sum_weight += value[1] + #getting the weighted vote + self.prediction_voted = list(map(lambda a : a/sum_weight, self.prediction_voted)) + return self.scalingY.inverse_transform(self.prediction_voted) + + def get_RMSE(self,X_test, Y_test): + MSE = list(map(lambda a,b : (a-b)**2, self.predict(X_test) , Y_test)) + MSE = np.average(MSE)**0.5 + return MSE + + def get_listResidualAbu(self,X_test, Y_test): + return list(map(lambda a,b : np.absolute(a-b), self.predict(X_test) , Y_test)) + + def get_listResidual(self,X_test, Y_test): + return list(map(lambda a,b : a-b, self.predict(X_test) , Y_test)) + + def partial_fit(self,X_test, Y_test): + Y_test = self.scalingY.trassform(Y_test) + loss_ensemble = np.average(list(map(lambda a,b : np.absolute(a-b), self.scalingY.trassform(self.predict(X_test)),Y_test))) + + #calculate the loss of each expert in this batch + # and update its weight + # by the way, getting the name of worest one + # by the way , getting the name of oldest one + worestLoss = 0 + oldestKey = 10000000 + for key,value in self.dic_expert.items(): + # 计算每个expert的损失,这里要改动 + #loss_oneExpert = np.average(list(map(lambda a,b : np.absolute(a-b),self.scalingY.trassform(value[0].predict(X_test)),Y_test))) + if value[2] == 'RNN': + # print(X_test) + X_test_for_RNN = np.array(X_test) + X_test_for_RNN = np.reshape(X_test_for_RNN, (X_test_for_RNN.shape[0], 1, X_test_for_RNN.shape[1])) + prediction_oneExpert = value[0].predict(X_test_for_RNN) + + else: + prediction_oneExpert = value[0].predict(X_test) + loss_oneExpert = np.average(list(map(lambda a,b : np.absolute(a-b),prediction_oneExpert,Y_test))) + + self.dic_expert[key][1] = self.dic_expert[key][1]*self.beta**loss_oneExpert + if loss_oneExpert > worestLoss: + worestLoss = loss_oneExpert + keyOfWorest = key + if oldestKey > key: + oldestKey = key + # updating all expert + if value[2] == 'online': + value[0].partial_fit(X_test,Y_test) + elif value[2] == 'RNN': + Y_test_for_RNN = np.array(Y_test) + value[0].fit(X_test_for_RNN,Y_test_for_RNN, epochs=8,batch_size=100, verbose=2) + + #if we needs new expert + if loss_ensemble > self.gamma: + # if loss_ensemble > self.gamma or : + if self.flagBoosting == 1: + boostX = [] + boostY = [] + while(len(boostY)<len(X_test)): + index_instance = random.randint(0,len(X_test)-1) + # getting loss of normalised instance + loss_instance = np.absolute( self.prediction_voted[index_instance] - Y_test[index_instance]) + #determine the number of repeat of instances as his real loss + numBoosting = np.random.poisson(loss_instance/loss_ensemble) + # print('numBoosting : ',numBoosting) + # print(numBoosting.s) + for k in range(0,int(numBoosting)): + boostX.append(X_test[index_instance]) + boostY.append(Y_test[index_instance]) + + else: + boostX = X_test[:] + boostY = Y_test[:] + + + + # randomly select a model + # rand = random.randint(0,8) + rand = random.randint(0,6) + + if rand == 0: + self.dic_expert[self.index_New_expert]= [SGDRegressor(eta0=0.000001),self.tau,'online'] + self.dic_expert[self.index_New_expert][0].partial_fit(boostX , boostY) + elif rand == 1: + self.dic_expert[self.index_New_expert]= [PassiveAggressiveRegressor(),self.tau,'online'] + self.dic_expert[self.index_New_expert][0].partial_fit(boostX , boostY) + elif rand == 2: + self.dic_expert[self.index_New_expert]= [LinearRegression(),self.tau,'static'] + self.dic_expert[self.index_New_expert][0].fit(boostX , boostY) + elif rand == 3: + self.dic_expert[self.index_New_expert]= [svm.SVR(gamma='auto'),self.tau,'static'] + self.dic_expert[self.index_New_expert][0].fit(boostX , boostY) + elif rand == 4: + self.dic_expert[self.index_New_expert]= [Ridge(alpha=.5),self.tau,'static'] + self.dic_expert[self.index_New_expert][0].fit(boostX , boostY) + elif rand == 5: + self.dic_expert[self.index_New_expert]= [Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,normalize=False, positive=False, precompute=False, random_state=None,selection='cyclic', tol=0.0001, warm_start=False) + ,self.tau,'static'] + self.dic_expert[self.index_New_expert][0].fit(boostX , boostY).fit(boostX , boostY) + elif rand == 6: + self.dic_expert[self.index_New_expert]= [tree.DecisionTreeRegressor(),self.tau,'static'] + self.dic_expert[self.index_New_expert][0].fit(boostX , boostY) + # elif rand == 7: + # X_train_for_RNN = np.array(boostX) + # X_train_for_RNN = np.reshape(X_train_for_RNN, (X_train_for_RNN.shape[0], 1, X_train_for_RNN.shape[1])) + # Y_train_for_RNN = np.array(boostY) + # self.dic_expert[self.index_New_expert]= [Sequential(),self.tau,'RNN'] + # self.dic_expert[self.index_New_expert][0].add(GRU(8, input_shape=(1,X_train_for_RNN.shape[2]))) + # self.dic_expert[self.index_New_expert][0].add(Dense(1)) + # self.dic_expert[self.index_New_expert][0].compile(loss='mean_squared_error', optimizer='adam') + # self.dic_expert[self.index_New_expert][0].fit(X_train_for_RNN, + # Y_train_for_RNN, + # epochs=5, + # batch_size=100, + # verbose=2) + + # elif rand == 8: + # X_train_for_RNN = np.array(boostX) + # X_train_for_RNN = np.reshape(X_train_for_RNN, (X_train_for_RNN.shape[0], 1, X_train_for_RNN.shape[1])) + # Y_train_for_RNN = np.array(boostY) + # self.dic_expert[self.index_New_expert]= [Sequential(),self.tau,'RNN'] + # self.dic_expert[self.index_New_expert][0].add(LSTM(5, input_shape=(1,X_train_for_RNN.shape[2]))) + # self.dic_expert[self.index_New_expert][0].add(Dense(1)) + # self.dic_expert[self.index_New_expert][0].compile(loss='mean_squared_error', optimizer='adam') + # self.dic_expert[self.index_New_expert][0].fit(X_train_for_RNN, + # Y_train_for_RNN, + # epochs=8, + # batch_size=100, + # verbose=2) + self.index_New_expert +=1 + + # purning some expert + if len(self.dic_expert) > self.maxNumExpert: + self.dic_expert.pop(keyOfWorest) + + # print("# expert" , len(self.dic_expert)) + # for key,value in self.dic_expert.items(): + # print('weight ' , value[1], end = '') + + +# X,Y =np.load("X_Y_RH2.npy") +# X,Y =np.load("HYPER10_Incremental.npy") +# X,Y =np.load("HYPER10_Gradual.npy") +# X,Y =np.load("HYPER10_Sudden.npy") +# X,Y =np.load("HYPER10_Reccouring.npy") + +# X,Y =np.load("Fried_Incremental.npy") +# X,Y =np.load("Fried_Gradual.npy") +# X,Y =np.load("Fried_Sudden.npy") +# X,Y =np.load("Fried_Reccouring.npy") + + +# tauxOfIni = 0.01 +# step = 100 +# X = list(X) +# Y= list(Y) + +# STD_X = MinMaxScaler().fit(X) +# X = list(STD_X.transform(X)) + +# numOfini = int(tauxOfIni*len(X)) +# X_train = X[:numOfini] +# X_test = X[numOfini:] +# Y_train = Y[:numOfini] +# Y_test = Y[numOfini:] + + +# myAddExp = AddExp(X_train, Y_train) +# residual_carre = [] +# i = 0 +# while(i< len(Y_test)-step): +# residual_carre.append(myAddExp.get_RMSE(X_test[i:i+step],Y_test[i:i+step])) +# # print(myAddExp.get_RMSE(X_test[i:i+step],Y_test[i:i+step])) +# # test = myAddExp.predit(X_test[i:i+step]) +# myAddExp.partial_fit(X_test[i:i+step],Y_test[i:i+step]) +# i += step + +# print("the mean of RMSE by AddExp is" , np.average(residual_carre)) +# print("the Standard Deviation of RMSE is",np.std(residual_carre)) + +# import matplotlib.pyplot as plt +# plt.plot(range(len(residual_carre)), residual_carre, linestyle = ':', color = 'red', label = " B-NNRW") +# plt.legend(loc='best') +# plt.ylabel("RMSE") +# plt.xlabel('time') +# np.save('RMSE_AddExp_onlineExpert.npy',residual_carre) \ No newline at end of file diff --git a/Class_B_NNRW.py b/Class_B_NNRW.py new file mode 100644 index 0000000000000000000000000000000000000000..2f3fbb5bc44a2115b9768534523b922d2da9067a --- /dev/null +++ b/Class_B_NNRW.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Fri Mar 27 15:33:20 2020 + +@author: taopeng +""" + + +import numpy as np +import random +from NNRW import SingeHiddenLayer +from sklearn.preprocessing import MinMaxScaler +from sklearn.model_selection import train_test_split + + +class B_NNRW(): + def __init__(self, + X_train, + Y_train, + tau = 1, + P = 0.5, + r = 0.1, + num_neurs = 10, + maxNumExpert = 30, + flagBoosting = 1): + + + self.tau = tau + self.P = P + self.r = r + self.num_neurs = num_neurs + self.maxNumExpert = maxNumExpert + self.flagBoosting = flagBoosting + + + #Initilization of first NNRW(s), + self.dic_expert = {} + self.index_New_expert = 0 + while(self.index_New_expert < self.maxNumExpert): + self.dic_expert[self.index_New_expert]= [SingeHiddenLayer(X_train,Y_train,self.num_neurs),self.tau,'NNRW'] + self.dic_expert[self.index_New_expert][0].train(X_train,Y_train) + self.index_New_expert +=1 + + def predict(self,X_test): + prediction_voted = [0]*len(X_test) + sum_weight = 0 + prediction_allExpert_step = {}# for store the prediction of each expert + for key,value in self.dic_expert.items(): + # call one expert, getting the predicttion of this expert + prediction_allExpert_step[key] = value[0].predict(X_test) + # print(prediction_allExpert_step[key]) + # getting weighted prediction + prediction_oneExpert_weighted = [c*value[1] for c in prediction_allExpert_step[key]] + #accumate the weighted prediction one by one + prediction_voted = list(map(lambda a , b : (a+b), prediction_voted , prediction_oneExpert_weighted)) + #accumate the weight one by one + sum_weight += value[1] + # print(sum_weight) + #getting the weighted vote + prediction_voted = list(map(lambda a : a/sum_weight, prediction_voted)) + #calculate the loss of ensemble in this batch + return prediction_voted + + def get_RMSE(self,X_test, Y_test): + MSE = list(map(lambda a,b : (a-b)**2, self.predict(X_test) , Y_test)) + MSE = np.average(MSE)**0.5 + return MSE + + def get_listResidualAbu(self,X_test, Y_test): + return list(map(lambda a,b : np.absolute(a-b), self.predict(X_test) , Y_test)) + + def get_listResidual(self,X_test, Y_test): + return list(map(lambda a,b : a-b, self.predict(X_test) , Y_test)) + + def partial_fit(self,X_test, Y_test): + + self.loss_ensemble = np.average(list(map(lambda a,b : np.absolute(a-b), self.predict(X_test),Y_test))) + + # geeting yhe loss of each expert, and decidie which are eligible for next butch, otherwise give tau == 0 + # and decice which is replaced + prediction_allExpert_step = {} + loss_AllExpert_step = [] + for key,value in self.dic_expert.items(): + prediction_allExpert_step[key] = value[0].predict(X_test) + loss_AllExpert_step.append( np.average( list(map(lambda a,b : np.absolute(a-b), prediction_allExpert_step[key], Y_test)))) + + + loss_AllExpert_step.sort() #默认升序排列 + indexOfP = int(self.P*len(loss_AllExpert_step))-1 + thresholdOfEligible = loss_AllExpert_step[indexOfP] + + indexOfr = int((1-self.r)*len(loss_AllExpert_step)) + thresholdOfReplacement = loss_AllExpert_step[indexOfr] + # print(indexOfr) + # print("loss: ",loss_AllExpert_step) + # print('thre: ',thresholdOfReplacement) + # updating tau and puring + numToPurnd = [] + for key,value in self.dic_expert.items(): + loss_oneExpert = np.average(list(map(lambda a,b : np.absolute(a-b),prediction_allExpert_step[key],Y_test))) + if loss_oneExpert < thresholdOfEligible: + self.dic_expert[key][1] = 1/loss_oneExpert + else: + self.dic_expert[key][1] = 0 + if loss_oneExpert > thresholdOfReplacement: + numToPurnd.append(key) + + # print(numToPurnd) + for s in numToPurnd: + self.dic_expert.pop(s) + # print('# of expert', len(self.dic_expert)) + + #adding + while(len(self.dic_expert)< self.maxNumExpert): + if self.flagBoosting == 1: + boostX = [] + boostY = [] + while(len(boostY)<len(X_test)): + index_instance = random.randint(0,len(X_test)-1) + # getting loss of normalised instance + loss_instance = np.absolute( self.predict(X_test)[index_instance] - Y_test[index_instance]) + #determine the number of repeat of instances as his real loss + numBoosting = np.random.poisson(loss_instance/self.loss_ensemble) + # print(numBoosting) + for k in range(numBoosting): + boostX.append(X_test[index_instance]) + boostY.append(Y_test[index_instance]) + u_train, u_test, v_train, v_test = train_test_split(boostX , boostY, test_size=0.15, random_state=0) + else: + u_train, u_test, v_train, v_test = train_test_split(X_test , Y_test, test_size=0.15, random_state=0) + + self.dic_expert[self.index_New_expert]= [SingeHiddenLayer(u_test,v_test,self.num_neurs),self.tau,'NNRW'] + self.dic_expert[self.index_New_expert][0].train(u_test,v_test) + + lossOfv_test = np.average(list(map(lambda a,b : np.absolute(a-b), self.dic_expert[self.index_New_expert][0].predict(u_test),v_test))) + + if lossOfv_test < thresholdOfReplacement: + self.index_New_expert +=1 + else: + self.dic_expert.pop(self.index_New_expert) + + + +# X,Y =np.load("X_Y_RH2.npy") +# X,Y =np.load("HYPER10_Incremental.npy") +# X,Y =np.load("HYPER10_Gradual.npy") +# X,Y =np.load("HYPER10_Sudden.npy") +# X,Y =np.load("HYPER10_Reccouring.npy") + +# X,Y =np.load("Fried_Incremental.npy") +# X,Y =np.load("Fried_Gradual.npy") +# X,Y =np.load("Fried_Sudden.npy") +# X,Y =np.load("Fried_Reccouring.npy") + + +# tauxOfIni = 0.01 +# step = 100 +# X = list(X) +# Y= list(Y) + +# STD_X = MinMaxScaler().fit(X) +# X = list(STD_X.transform(X)) +# Y = [c/1 for c in Y] + +# numOfini = int(tauxOfIni*len(X)) +# X_train = X[:numOfini] +# X_test = X[numOfini:] +# Y_train = Y[:numOfini] +# Y_test = Y[numOfini:] + + +# myBNNRW = B_NNRW(X_train, Y_train) +# residual_carre = [] +# i = 0 +# while(i< len(Y_test)-step): +# residual_carre.append(myBNNRW.get_RMSE(X_test[i:i+step],Y_test[i:i+step])) +# # print(myBNNRW.get_RMSE(X_test[i:i+step],Y_test[i:i+step])) +# # test = myBNNRW.predict(X_test[i:i+step]) +# myBNNRW.partial_fit(X_test[i:i+step],Y_test[i:i+step]) +# i += step + +# print("the mean of RMSE by B-NNRW is" , np.average(residual_carre)) +# print("the Standard Deviation of RMSE is",np.std(residual_carre)) + +# import matplotlib.pyplot as plt +# plt.plot(range(len(residual_carre)), residual_carre, linestyle = ':', color = 'red', label = " B-NNRW") +# plt.legend(loc='best') +# plt.ylabel("RMSE") +# plt.xlabel('time') +# np.save('RMSE_B_NNRW.npy',residual_carre) + + + + + + \ No newline at end of file diff --git a/Class_affiche_bar.py b/Class_affiche_bar.py new file mode 100644 index 0000000000000000000000000000000000000000..467f854c47516b0554ec80e89cc0d760fd86a419 --- /dev/null +++ b/Class_affiche_bar.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Mar 30 14:30:02 2020 + +@author: taopeng +""" + + + + +''' +print 中的 end = '\r' 是一个转义符,作用是让光标重新回到首行 +默认是'\n'换行符,windows系统下如果设置后不改回默认值会一直有效,不懂为什么 +''' + + + +import sys,time +def bar( words, i ): + sys.stdout.write('\r'+words+'[%s%%]'%(i+1)) + sys.stdout.flush() + time.sleep(0.1) + \ No newline at end of file diff --git a/Generator_dataSet.py b/Generator_dataSet.py new file mode 100644 index 0000000000000000000000000000000000000000..8bcd9515d9501fd6f2d1dc1c57dbbe49fcffea71 --- /dev/null +++ b/Generator_dataSet.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sat Mar 28 17:40:55 2020 + +输入读取不带 噪音的文件 污染方式 污染比利 X, Y 是否归一化处理 + +处理 对 X归一化处理 + +输出 干净的启动数据 +输出 干净的batch数据,被污染的batch数据 + + +@author: taopeng +""" +import numpy as np +import random +from sklearn.preprocessing import MinMaxScaler +from sklearn.model_selection import train_test_split + + + +# X,Y =np.load("X_Y_RH2.npy") +# X,Y =np.load("HYPER10_Incremental.npy") +# X,Y =np.load("HYPER10_Gradual.npy") +# X,Y =np.load("HYPER10_Sudden.npy") +# X,Y =np.load("Fried_Incremental.npy") +# X,Y =np.load("Fried_Gradual.npy") +# X,Y =np.load("Fried_Sudden.npy") +# X,Y =np.load("Fried_Reccouring.npy") + +class Scaling(): + def fit(self, U): + self.maxU = np.max(U) + self.minU = np.min(U) + + def transform(self , X): + return [(c - self.minU)/(self.maxU - self.minU) for c in X ] + + def inverse_transform(self , X): + return [ c*(self.maxU - self.minU)+self.minU for c in X ] + + +class genenrateur_dataSet(): + def __init__(self, + typePollution = 'GN', + porcentageIni = 0.05, + step = 100, + ScalingX = 'yes', + ScalingY = 'yes'): + self.ScalingX = ScalingX + self.ScalingY = ScalingY + self.porcentageIni = porcentageIni + self.step = step + + + def readFile(self,file): + X,Y = np.load(file,allow_pickle=True) + X = list(X) + Y = list(Y) + if self.ScalingX == 'yes': + STD_X = MinMaxScaler().fit(X) + self.X = list(STD_X.transform(X)) + if self.ScalingY == 'yes': + myS = Scaling() + myS.fit(Y) + self.Y = myS.transform(Y) + self.index = int(self.porcentageIni * len(self.X)) + + def getIniDatas(self, myType, pourcentage): + numOfIni = int(self.porcentageIni * len(self.X)) + + marque = np.random.randint(100, size = numOfIni) + marque[marque > pourcentage] = 100 + marque[marque <= pourcentage] = 0 + marque = [ c/ 100 for c in marque ] + self.mean = np.average(self.Y) + self.std = np.std(self.Y) + + Traget_Y = [] + for i in range(numOfIni): + # print(i) + if marque[i] == 0: + Traget_Y.append( + self.Y[i] + + np.random.normal(loc= 0 , scale = self.std) + ) + else: + # print(self.Y[i]) + Traget_Y.append(self.Y[i]) + + return [self.X[:numOfIni],self.Y[:numOfIni], marque ,Traget_Y] + + + def hasNext(self): + if self.index < len(self.Y) - self.step: + return True + else: + return False + + def getNextCleanBatch(self): + self.index += self.step + return [self.X[self.index - self.step : self.index ] , self.Y[self.index - self.step : self.index] ] + + def getNextPollutedBatch(self, myType, pourcentage): + marque = np.random.randint( 100, size = self.step) + marque[marque > pourcentage] = 100 + marque[marque <= pourcentage] = 0 + marque = [ c/ 100 for c in marque ] + + # 1 is clean data, 0 is dirty data + meanOfthisPatch = np.average( + self.Y[self.index - self.step : self.index] + ) + + self.index += self.step + Traget_Y = [] + for i in range(self.step): + if marque[i] == 0: + Traget_Y.append( + self.Y[self.index - self.step + i] + + np.random.normal(loc= self.mean,scale = self.std) + ) + else: + Traget_Y.append( self.Y[self.index - self.step + i] ) + + return [self.X[self.index - self.step : self.index ] , + Traget_Y, + marque, + self.Y[self.index - self.step : self.index]] + + + + + +myData = genenrateur_dataSet() +dataName = "MediaData/X_Y_RH2.npy" +# dataName = "MediaData/X_Y_RH5.npy" +# dataName = "MediaData/X_Y_RH6.npy" +# dataName = "MediaData/X_Y_RH8.npy" + +myData.readFile(file =dataName) +all_Y = [] + +# m = 0 时代表被污染了 +X_train,Y_train, m , Y_polluted = myData.getIniDatas('', 30) +all_Y = all_Y + Y_polluted + +while(myData.hasNext()): + x,py,m,cy = myData.getNextPollutedBatch('', 30) + all_Y = all_Y + py + + + + + + +# import matplotlib.pyplot as plt +# plt.plot(range(len(residual_carre)), residual_carre, linestyle = ':', color = 'red', label = " B-NNRW") +# plt.legend(loc='best') +# plt.ylabel("RMSE") +# plt.xlabel('time') +# np.save('RMSE_AddExp_onlineExpert.npy',residual_carre) + + + + + + + + + diff --git a/MediaData/DPT_AddExp_Score_DTOMX_Y_RH2.npy5.npy b/MediaData/DPT_AddExp_Score_DTOMX_Y_RH2.npy5.npy new file mode 100644 index 0000000000000000000000000000000000000000..552e793037a0711a0c7cb4a123467b0910627948 Binary files /dev/null and b/MediaData/DPT_AddExp_Score_DTOMX_Y_RH2.npy5.npy differ diff --git a/MediaData/DPT_B_NNRW_Score_DTOMX_Y_RH2.npy5.npy b/MediaData/DPT_B_NNRW_Score_DTOMX_Y_RH2.npy5.npy new file mode 100644 index 0000000000000000000000000000000000000000..dd60dc871d2bfd4b636f36b3d9bd1330de25991c Binary files /dev/null and b/MediaData/DPT_B_NNRW_Score_DTOMX_Y_RH2.npy5.npy differ diff --git a/MediaData/X_Y_RH2.npy b/MediaData/X_Y_RH2.npy new file mode 100644 index 0000000000000000000000000000000000000000..e6e7eeb4c81556d95296660ff167096bfd70f669 Binary files /dev/null and b/MediaData/X_Y_RH2.npy differ diff --git a/NNRW.py b/NNRW.py new file mode 100644 index 0000000000000000000000000000000000000000..3a62ac9518bab2890cb4b9c45545fc71386fd370 --- /dev/null +++ b/NNRW.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Mar 24 17:09:55 2020 + +@author: taopeng +""" + + +import numpy as np +# from sklearn.datasets import load_iris #数据集 +# from sklearn.model_selection import train_test_split #数据集的分割函数 +from sklearn.preprocessing import StandardScaler #数据预处理 + + + +class SingeHiddenLayer(object): + + def __init__(self,X,y,num_hidden): + self.data_x = X #判断输入训练集是否大于等于二维; 把x_train()取下来 + self.num_data = len(self.data_x) #训练数据个数 + self.num_feature = len(self.data_x[0]); #shape[] 读取矩阵的长度,比如shape[0]就是读取矩阵第一维度的长度 (120行,4列,所以shape[0]==120,shapep[1]==4) + self.num_hidden = num_hidden; #隐藏层节点个数 + + #随机生产权重(从-1,到1,生成(num_feature行,num_hidden列)) + self.w = np.random.uniform(-1, 1, (self.num_feature, self.num_hidden)) + + #随机生成偏置,一个隐藏层节点对应一个偏置 + for i in range(self.num_hidden): + b = np.random.uniform(-0.6, 0.6, (1, self.num_hidden)) + self.first_b = b + + #生成偏置矩阵,以隐藏层节点个数4为行,样本数120为列 + for i in range(self.num_data-1): + b = np.row_stack((b, self.first_b)) #row_stack 以叠加行的方式填充数组 + self.b = b + #定义sigmoid函数 + def sigmoid(self,x): + return 1.0 / (1 + np.exp(-x)) + + def train(self,x_train,y_train): + mul = np.dot(self.data_x, self.w) #输入乘以权重 + add = mul + self.b #加偏置 + H = self.sigmoid(add) #激活函数 + + H_ = np.linalg.pinv(H) #求广义逆矩阵 + #print(type(H_.shape)) + + self.out_w = np.dot(H_, y_train) #求输出权重 + + def predict(self,x_test): + self.t_data = x_test #测试数据集 + self.num_tdata = len(self.t_data) #测试集的样本数 + + b = self.first_b + + #扩充偏置矩阵,以隐藏层节点个数4为行,样本数30为列 + for i in range(self.num_tdata-1): + b = np.row_stack((b, self.first_b)) #以叠加行的方式填充数组 + + #预测 + self.pred_Y = np.dot(self.sigmoid(np.dot(self.t_data,self.w)+b),self.out_w) + return self.pred_Y + + def score(self,y_test): + MSE = np.average(list(map(lambda a,b : (a-b)**2, self.pred_Y, y_test))) + print( MSE**0.5) + + +stdsc = StandardScaler() #StandardScaler类,利用接口在训练集上计算均值和标准差,以便于在后续的测试集上进行相同的缩放 + + +# x,y = stdsc.fit_transform([x,y]),iris.target #数据归一化 + +# X,Y =np.load("X_Y_RH2.npy") +# # X,Y =np.load("HYPER10_Incremental.npy") +# # X,Y =np.load("HYPER10_Gradual.npy") +# # X,Y =np.load("HYPER10_Sudden.npy") +# # X,Y =np.load("Fried_Incremental.npy") +# # X,Y =np.load("Fried_Gradual.npy") +# # X,Y =np.load("Fried_Sudden.npy") +# # X,Y =np.load("Fried_Reccouring.npy") + +# from sklearn.preprocessing import MinMaxScaler + +# X = list(X) +# Y= list(Y) + +# STD_X = MinMaxScaler().fit(X) +# X = list(STD_X.transform(X)) + +# tauxOfIni = 0.1 +# numOfini = int(tauxOfIni*len(X)) +# X_train = X[:numOfini] +# X_test = X[numOfini:] +# Y_train = Y[:numOfini] +# Y_test = Y[numOfini:] + +# ELM = SingeHiddenLayer(X_train,Y_train,12) #训练数据集,训练集的label,隐藏层节点个数 +# ELM.train(X_train,Y_train) +# ELM.predict(X_test) +# ELM.score(Y_test) diff --git a/Reading.py b/Reading.py deleted file mode 100755 index d3c13aeae9b22ebe2546f750183235d82d6f9daa..0000000000000000000000000000000000000000 --- a/Reading.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Fri Jun 19 17:24:31 2020 - -@author: taopeng -""" - -import xlrd - - -def excel_data(file): - # 打开Excel文件读取数据 - data = xlrd.open_workbook(file) - # 获取第一个工作表 - table = data.sheet_by_index(3) - # 获取行数 - nrows = table.nrows - # 获取列数 - ncols = table.ncols - # 定义excel_list - excel_list = [] - for row in range(1, nrows): - myRow = [] - for col in range(ncols): - # 获取单元格数据 - cell_value = table.cell(row, col).value - # 把数据追加到excel_list中 - myRow.append(cell_value) - excel_list.append(myRow) - return excel_list - - -mylist = excel_data('ourExcel.xlsx') -for i in range(0,len(mylist)): - if i < 5: - print(mylist[i]) - - - - -# we select client whose 18 < age < 50, that means bd 1950-2002 -import time -for i in range(0,len(mylist)): - if i < 5 and i >1: - # myTime = timeArray = time.strptime(int(mylist[i][5]), "%Y-%m-%d %H:%M:%S") - print(mylist[i]) - - diff --git a/__pycache__/Class_AddExp_online.cpython-37.pyc b/__pycache__/Class_AddExp_online.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..38804ed8c691a7e43053a6430b7429e1a3bac6d6 Binary files /dev/null and b/__pycache__/Class_AddExp_online.cpython-37.pyc differ diff --git a/__pycache__/Class_AddExp_online_RNN.cpython-37.pyc b/__pycache__/Class_AddExp_online_RNN.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fa7fa4d1ebb9db22b7b302eb6c643495194144f8 Binary files /dev/null and b/__pycache__/Class_AddExp_online_RNN.cpython-37.pyc differ diff --git a/__pycache__/Class_B_NNRW.cpython-37.pyc b/__pycache__/Class_B_NNRW.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..10b5051b7e876d926fe170e55f0fb5666ea612a6 Binary files /dev/null and b/__pycache__/Class_B_NNRW.cpython-37.pyc differ diff --git a/__pycache__/Class_affiche_bar.cpython-37.pyc b/__pycache__/Class_affiche_bar.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6c877a66592165a9933397f4f4d3ccf5f2bebdcc Binary files /dev/null and b/__pycache__/Class_affiche_bar.cpython-37.pyc differ diff --git a/__pycache__/Generator_dataSet.cpython-37.pyc b/__pycache__/Generator_dataSet.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..53eba9bb8a850febe7a52733d076d0d4ab564101 Binary files /dev/null and b/__pycache__/Generator_dataSet.cpython-37.pyc differ diff --git a/__pycache__/NNRW.cpython-37.pyc b/__pycache__/NNRW.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3a6b23013bd04a66b920902ecf8a7465c76cae09 Binary files /dev/null and b/__pycache__/NNRW.cpython-37.pyc differ diff --git a/diff_Ratio_DPT_A_DTOM.py b/diff_Ratio_DPT_A_DTOM.py new file mode 100755 index 0000000000000000000000000000000000000000..64951b64d6b3ceb9b9c4e1455584b18187d25bdc --- /dev/null +++ b/diff_Ratio_DPT_A_DTOM.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu May 7 20:43:29 2020 + +@author: taopeng +""" + + +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sat Mar 28 21:57:40 2020 + +@author: taopeng +""" + +from Generator_dataSet import genenrateur_dataSet +import numpy as np +from Class_AddExp_online_RNN import AddExp +from Class_B_NNRW import B_NNRW +from scipy import stats +from Class_affiche_bar import bar +from sklearn.metrics import accuracy_score +from sklearn.metrics import recall_score +from sklearn.metrics import precision_score +from sklearn.metrics import balanced_accuracy_score +import time + + +def getDecision(value,Trsut_threshold, mean, std, degree): + score =stats.t.cdf(value, degree, loc=mean, scale=std) + if score > 0.5: + score = 1-score + if score < Trsut_threshold: + return 0 + else: + return 1 +def getScore(value,Trsut_threshold, mean, std, degree): + score =stats.t.cdf(value, degree, loc=mean, scale=std) + if score > 0.5: + score = 1-score + return score + + +''' +Preparing the data set + +''' +#for pourcentageSTND in [5,10,15,20,25]: + +for pourcentageSTND in [5]: + + print("pourcentageSTND ",pourcentageSTND) + + myData = genenrateur_dataSet() + dataName = "X_Y_RH2.npy" + + myData.readFile(file ="MediaData/" + dataName) + + + + ''' + Initialising the model + + ''' + + X_train,Y_train, marque , Y_polluted = myData.getIniDatas('',pourcentageSTND) # 30#被污染 + + myAddExp = AddExp(X_train, Y_train) + residual_train = list(map(lambda a,b : a-b, myAddExp.predict(X_train) , Y_train)) + residual_polluted_train = list(map(lambda a,b : a-b, myAddExp.predict(X_train) , Y_polluted)) + + mean_resiual_train = np.mean(residual_train) # 残差的均值 + std_resiual_train = np.std(residual_train) # 残差标准差 + + + ''' + Automatically select the trsut threshold in the initialization data or set it yourself + This is an option that is not in the article content + ''' +# #测试所有trust阈值 +# dic_thresholds = {} +# # dic_thresholds = [] +# for Trsut_threshold in list(np.arange(0.0,0.5,0.02)): +# # print (Trsut_threshold) +# # UT_train= UT[:numTrain]# 测试集需要的被污染标记 +# R = [] #当得分低于阈值,标记为1 视作信任值, 0不信任 +# for i in range(len(residual_polluted_train)): +# R.append(getDecision(residual_polluted_train[i],Trsut_threshold, mean_resiual_train , std_resiual_train, len(residual_polluted_train)-1 ) ) +# #计算表现 并存储 +# dic_thresholds[Trsut_threshold] = balanced_accuracy_score(marque,R) +# # dic_thresholds[Trsut_threshold] = accuracy_score(marque,R) +# # dic_thresholds[Trsut_threshold] = recall_score(marque,R) +# # dic_thresholds[Trsut_threshold] = precision_score(marque,R) +# myTrsut_threshold = max(dic_thresholds, key=dic_thresholds.get) # 返回最小值的key +# # myTrsut_threshold = 0.1 # 返回最小值的key + myTrsut_threshold = 0.12 + + #Viewing window + R_t = [] # + S_t = [] # + for i in range(len(residual_polluted_train)): + R_t.append(getDecision(residual_polluted_train[i],myTrsut_threshold, mean_resiual_train , std_resiual_train, len(residual_polluted_train)-1 ) ) + S_t.append(getScore(residual_polluted_train[i],myTrsut_threshold, mean_resiual_train , std_resiual_train, len(residual_polluted_train)-1 ) ) + + + + ''' + Start to enter the data stream + + ''' + performanceDPT_precision = [] +# performanceDPT_recall = [] +# performanceDPT_Accuracy = [] +# performanceDPT_balancedAccuracy = [] +# MSE = [] + + + + listScoreNonTrsut = [] + listScoreNonTrsutIndex = [] + listScoreTrut = [] + listScoreTrutIndex = [] + + performanceDPT_time_pred = [] + performanceDPT_time_update = [] + + + index = 0 + limit = 0 # + + while(myData.hasNext()): + bar('index of batch: ',limit) + # x,y = myData.getNextCleanBatch() + x,polluted_y,m,clean_y = myData.getNextPollutedBatch('',pourcentageSTND) + m =list(m) + # residual_batch = myAddExp.get_listResidualAbu(x,polluted_y) # 这里看起来也不需要绝对值 + start_pred = time.process_time()# Heure de début d'enregistrement记录开始时间 + + residual_batch = myAddExp.get_listResidual(x,polluted_y) # 这里看起来也不需要绝对值 + + R = [] # Record Determination + R_eligible = [] # 纪录条件加权或松懈之后的判定,可以用来决定升级 + ScoreList = []# 纪录得分 + for i in range(len(residual_batch)): + ScoreList.append(getScore(residual_batch[i],myTrsut_threshold, mean_resiual_train , std_resiual_train, len(residual_batch)-1 )) + R.append(getDecision(residual_batch[i],myTrsut_threshold, mean_resiual_train , std_resiual_train, len(residual_batch)-1 ) ) + R_eligible.append(getDecision(residual_batch[i],myTrsut_threshold+0.3, mean_resiual_train , std_resiual_train, len(residual_batch)-1 ) ) + + # if len(m) != 100 or len(R)!= 100: + # print("my warming \!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") + # print(len(m)) + + # performanceDPT_precision.append(precision_score(m,R) ) + # print(limit , "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") + # pre = precision_score(m,R) + # if pre == 0: + # print("my warming") + # print(R) + # print(m) + # print(myAddExp.predict(x)) + # print(clean_y) + # print(myAddExp.get_listResidualAbu(x,polluted_y)) + # print(pre) + + + # performanceDPT_recall.append( recall_score(m,R) ) + # performanceDPT_Accuracy.append(accuracy_score(m,R) ) +# performanceDPT_balancedAccuracy.append( balanced_accuracy_score(m,R) ) +# MSE.append( myAddExp.get_RMSE(x,clean_y) ) + + limit += 1 + + performanceDPT_time_pred.append(round(time.process_time()-start_pred,3)) #记录时间 + bar('performanceDPT_time_pred (average): ', np.mean(performanceDPT_time_pred)) + performanceDPT_time_pred_average = np.mean(performanceDPT_time_pred) + + start_update = time.process_time() + + # Calculate the median, and first quartile, to help decide which can be upgraded + myMedian = np.median(ScoreList) + Quanlite = np.percentile(ScoreList, 25) + + + + x_eligible = [] + y_eligible = [] + for i in range(len(R)): + # if R_eligible[i] == 1: + if ScoreList[i] > myMedian: + # if ScoreList[i] > Quanlite: + x_eligible.append(x[i]) + y_eligible.append(polluted_y[i]) + + for o in range(len(ScoreList)): + if m[o] ==1: + listScoreTrut.append(ScoreList[o]) + listScoreTrutIndex.append(index) + else: + listScoreNonTrsut.append(ScoreList[o]) + listScoreNonTrsutIndex.append(index) + index +=1 + + + performanceDPT_time_update.append(round(time.process_time()-start_update,3)) #记录时间 + bar('performanceDPT_time_update (average): ', np.mean(performanceDPT_time_update)) + performanceDPT_time_update_average = np.mean(performanceDPT_time_update) + +# +# print("perciosn: the mean of performanceDPT by AddExp is" , np.average(performanceDPT_precision)) +# print(" perciosn: perciosn: the Standard Deviation of performanceDPT is",np.std(performanceDPT_precision)) +# print("recall: the mean of performanceDPT by AddExp is" , np.average(performanceDPT_recall)) +# print(" recall: the Standard Deviation of performanceDPT is",np.std(performanceDPT_recall)) +# print("Acc: the mean of performanceDPT by AddExp is" , np.average(performanceDPT_Accuracy)) +# print(" Acc: the Standard Deviation of performanceDPT is",np.std(performanceDPT_Accuracy)) +# print("Bal_Acc: the mean of performanceDPT by AddExp is" , np.average(performanceDPT_balancedAccuracy)) +# print(" Bal_Acc: the Standard Deviation of performanceDPT is",np.std(performanceDPT_balancedAccuracy)) +# + + + np.save("MediaData/" +'DPT_AddExp_Score_DTOM'+dataName+str(pourcentageSTND), [[myTrsut_threshold], listScoreTrut, listScoreTrutIndex, listScoreNonTrsut,listScoreNonTrsutIndex]) diff --git a/diff_Ratio_DPT_B_NNRW.py b/diff_Ratio_DPT_B_NNRW.py new file mode 100755 index 0000000000000000000000000000000000000000..7ee2fb87857080119e4655552ee58acc9d0243d3 --- /dev/null +++ b/diff_Ratio_DPT_B_NNRW.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Sat Mar 28 21:57:40 2020 + +@author: taopeng +""" + +from Generator_dataSet import genenrateur_dataSet +import numpy as np +from Class_AddExp_online import AddExp +from Class_B_NNRW import B_NNRW +from scipy import stats +from Class_affiche_bar import bar +from sklearn.metrics import accuracy_score +from sklearn.metrics import recall_score +from sklearn.metrics import precision_score +from sklearn.metrics import balanced_accuracy_score + + + +def getDecision(value,Trsut_threshold, mean, std, degree): + score =stats.t.cdf(value, degree, loc=mean, scale=std) + if score > 0.5: + score = 1-score + if score < Trsut_threshold: + return 0 + else: + return 1 +def getScore(value,Trsut_threshold, mean, std, degree): + score =stats.t.cdf(value, degree, loc=mean, scale=std) + if score > 0.5: + score = 1-score + return score + + +''' +准备数据集 +''' + +#for pourcentageSTND in [5,10,15,20,25,30,35,40,45,50,55,60,65]: +for pourcentageSTND in [5]: + + print("Percentage of polluted : pourcentageSTND ",pourcentageSTND) + + myData = genenrateur_dataSet() + dataName = "X_Y_RH2.npy" + myData.readFile(file ="MediaData/" + dataName) + + + ''' + Initialising the model + ''' + X_train,Y_train, marque , Y_polluted = myData.getIniDatas('',pourcentageSTND) + myB_NNRW = B_NNRW(X_train, Y_train) + #Obtaining residual information at initialisation + residual_train = myB_NNRW.get_listResidual(X_train,Y_train) + residual_polluted_train = myB_NNRW.get_listResidual(X_train,Y_polluted)# + + mean_resiual_train = np.mean(residual_train) # + std_resiual_train = np.std(residual_train) # + + + ''' + Automatically select the trsut threshold in the initialization data or set it yourself + This is an option that is not in the article content + ''' +# #测试所有trust阈值 +# dic_thresholds = {} +# # dic_thresholds = [] +# print('optimization of myTrsut_threshold') +# for Trsut_threshold in list(np.arange(0.0,0.5,0.02)): +# # print (Trsut_threshold) +# R = [] #When the score is below the threshold, mark 1 as a trust value, 0 as no trust +# for i in range(len(residual_polluted_train)): +# R.append(getDecision(residual_polluted_train[i],Trsut_threshold, mean_resiual_train , std_resiual_train, len(residual_polluted_train)-1 ) ) +# #Calculate performance and store +# dic_thresholds[Trsut_threshold] = balanced_accuracy_score(marque,R) +# # dic_thresholds[Trsut_threshold] = accuracy_score(marque,R) +# # dic_thresholds[Trsut_threshold] = recall_score(marque,R) +# # dic_thresholds[Trsut_threshold] = precision_score(marque,R) +# myTrsut_threshold = max(dic_thresholds, key=dic_thresholds.get) # sitting auto +## myTrsut_threshold = 0.12 # sitting by hand + myTrsut_threshold = 0.05 # 返回最小值的key +# print('fin of optimization of myTrsut_threshold') + + + #Viewing window + R_t = [] # + S_t = [] # + for i in range(len(residual_polluted_train)): + R_t.append(getDecision(residual_polluted_train[i],myTrsut_threshold, mean_resiual_train , std_resiual_train, len(residual_polluted_train)-1 ) ) + S_t.append(getScore(residual_polluted_train[i],myTrsut_threshold, mean_resiual_train , std_resiual_train, len(residual_polluted_train)-1 ) ) + + + + ''' + Start to enter the data stream + ''' + performanceDPT_precision = [] +# performanceDPT_recall = [] +# performanceDPT_Accuracy = [] +# performanceDPT_balancedAccuracy = [] + + + num = 0 # + + listScoreNonTrsut = [] + listScoreNonTrsutIndex = [] + listScoreTrut = [] + listScoreTrutIndex = [] + index = 0 + while(myData.hasNext()): + bar('index of batch: ',num) + x,polluted_y,m,clean_y = myData.getNextPollutedBatch('',pourcentageSTND) + residual_batch = myB_NNRW.get_listResidual(x,polluted_y) + R = [] # 纪录判定 + R_eligible = [] # 纪录条件加权或松懈之后的判定,可以用来决定升级 + ScoreList = []# 纪录得分 + for i in range(len(residual_batch)): + ScoreList.append(getScore(residual_batch[i],myTrsut_threshold, mean_resiual_train , std_resiual_train, len(residual_batch)-1 )) + R.append(getDecision(residual_batch[i],myTrsut_threshold, mean_resiual_train , std_resiual_train, len(residual_batch)-1 ) ) + R_eligible.append(getDecision(residual_batch[i],myTrsut_threshold+0.3, mean_resiual_train , std_resiual_train, len(residual_batch)-1 ) ) + + + performanceDPT_precision.append( precision_score(m,R) ) +# performanceDPT_recall.append( recall_score(m,R) ) +# performanceDPT_Accuracy.append(accuracy_score(m,R) ) +# performanceDPT_balancedAccuracy.append( balanced_accuracy_score(m,R) ) + num += 1 + + # Calculate the median, and first quartile, to help decide which can be upgraded + myMedian = np.median(ScoreList) + Quanlite = np.percentile(ScoreList, 25) + + # Scoring of statistical contaminated data and uncontaminated data + + for o in range(len(ScoreList)): + if m[o] ==1: + listScoreTrut.append(ScoreList[o]) + listScoreTrutIndex.append(index) + else: + listScoreNonTrsut.append(ScoreList[o]) + listScoreNonTrsutIndex.append(index) + index +=1 + + x_eligible = [] + y_eligible = [] + for i in range(len(R)): + # if R_eligible[i] == 1: + if ScoreList[i] > myMedian: + # if ScoreList[i] > Quanlite: + x_eligible.append(x[i]) + y_eligible.append(polluted_y[i]) + + + +# print("perciosn: the mean of performanceDPT by BNNRW is" , np.average(performanceDPT_precision)) +# print(" perciosn: perciosn: the Standard Deviation of performanceDPT is",np.std(performanceDPT_precision)) +# print("recall: the mean of performanceDPT by AddExp is" , np.average(performanceDPT_recall)) +# print(" recall: the Standard Deviation of performanceDPT is",np.std(performanceDPT_recall)) +# print("Acc: the mean of performanceDPT by AddExp is" , np.average(performanceDPT_Accuracy)) +# print(" Acc: the Standard Deviation of performanceDPT is",np.std(performanceDPT_Accuracy)) +# print("Bal_Acc: the mean of performanceDPT by AddExp is" , np.average(performanceDPT_balancedAccuracy)) +# print(" Bal_Acc: the Standard Deviation of performanceDPT is",np.std(performanceDPT_balancedAccuracy)) + + + import matplotlib.pyplot as plt + plt.plot(range(len(performanceDPT_balancedAccuracy)), performanceDPT_balancedAccuracy, linestyle = ':', color = 'red', label = " B_NNRW") + plt.legend(loc='best') + plt.ylabel("DPT") + plt.xlabel('time') + np.save("MediaData/" +'DPT_B_NNRW_Score_DTOM'+dataName+str(pourcentageSTND), [[myTrsut_threshold], listScoreTrut, listScoreTrutIndex, listScoreNonTrsut,listScoreNonTrsutIndex]) diff --git a/get_resultat.py b/get_resultat.py new file mode 100755 index 0000000000000000000000000000000000000000..964fd91bf79e91ef73e04cb76b4ad4d475e87223 --- /dev/null +++ b/get_resultat.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Sep 17 15:53:52 2020 + +@author: taopeng +""" + + +import numpy as np +import matplotlib.pyplot as plt +#RatioList = [5,10,15,20,25,30,35,40,45,50,55,60,65] +RatioList = [5] + + +def cal_RES(fileName, ratioList,color,typeLine): + list_RMSE = [] + list_RMSE_T = [] + list_RMSE_NT = [] + list_Score_T = [] + list_Score_NT = [] + + + list_TPR = [] + list_TNR = [] + list_ACC = [] + + for i in ratioList: + threshold, T, T_index,NT,NT_index = np.load(fileName+str(i)+'.npy', + allow_pickle=True) + T[:] = [x * 2 for x in T] + NT[:] = [x * 2 for x in NT] + temp_Rmse = [] + temp_Rmse_T = [] + temp_Rmse_NT = [] + for o in T: + temp_Rmse.append((o-1)**2) + temp_Rmse_T.append((o-1)**2) + for k in NT: + temp_Rmse.append((k-0)**2) + temp_Rmse_NT.append((k-0)**2) + list_RMSE.append( np.average(temp_Rmse)**0.5 ) + list_RMSE_T.append(np.average(temp_Rmse_T)**0.5) + list_RMSE_NT.append(np.average(temp_Rmse_NT)**0.5) + list_Score_T.append(np.average(T)) + list_Score_NT.append(np.average(NT)) + + + + tem_TPR = 0 + tem_TNR = 0 + for i in T: + if i> threshold: + tem_TPR += 1 + list_TPR.append(tem_TPR/len(T)) + for k in NT: + if k < threshold: + tem_TNR += 1 + list_TNR.append(tem_TNR/len(NT)) + list_ACC.append( (tem_TNR+ tem_TPR)/(len(T)+len(NT))) + list_BACC = list(map(lambda x,y: (x + y)/2, list_TNR,list_TPR)) + + + res = {} + res['RMSE'] = list_RMSE + res['RMSE of ODs trust score'] = list_RMSE_T + res['RMSE of SUTDs trust score'] = list_RMSE_NT + res['Score_OD'] = list_Score_T + res['Score_SUTD'] = list_Score_NT + + res['TPR'] = list_TPR + res['TNR'] = list_TNR + res['ACC'] = list_ACC + res['BACC'] = list_BACC + + res['color'] = color + res['typeLine'] = typeLine + + return res + + + +B_DTOM = cal_RES("MediaData/DPT_B_NNRW_Score_DTOMX_Y_RH2.npy", RatioList,'blue','-') +A_DTOM = cal_RES("MediaData/DPT_AddExp_Score_DTOMX_Y_RH2.npy", RatioList,'red','-') + + + + +Attribu_Name = 'RMSE of ODs trust score' +print(Attribu_Name, 'BNNRW-DTOM') +print(B_DTOM[Attribu_Name]) +print(Attribu_Name, 'AddExp-DTOM') +print(A_DTOM[Attribu_Name]) + + +Attribu_Name = 'RMSE of SUTDs trust score' +print(Attribu_Name, 'BNNRW-DTOM') +print(B_DTOM[Attribu_Name]) +print(Attribu_Name, 'AddExp-DTOM') +print(A_DTOM[Attribu_Name]) + + + + \ No newline at end of file