From 4513c343f4355c271cf4095905372846d6f315b4 Mon Sep 17 00:00:00 2001 From: Baptiste Bauvin <baptiste.bauvin@lis-lab.fr> Date: Mon, 20 Jan 2020 14:37:29 +0100 Subject: [PATCH] debugging --- .idea/.gitignore | 3 + .idea/misc.xml | 7 + .idea/modules.xml | 8 + .idea/multiview_generator.iml | 9 + .idea/vcs.xml | 6 + __init__.py | 3 + demo/__init__.py | 1 + .../multiviews_datasets.cpython-36.pyc | Bin 0 -> 7067 bytes .../__pycache__/parameters.cpython-36.pyc | Bin 0 -> 1320 bytes generator/multiviews_datasets.py | 8 +- {demo => generator}/result.py | 3 +- .../test_classifier.cpython-36.pyc | Bin 0 -> 34322 bytes generator/use_generator_baptiste.py | 41 + ...ltiviews_datasets_generator.cpython-36.pyc | Bin 0 -> 7364 bytes late/execute.py | 35 + late/multiviews_datasets_generator.py | 203 +++ late/parameters.py | 60 + late/test_generator.py | 1140 +++++++++++++++++ 18 files changed, 1521 insertions(+), 6 deletions(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/multiview_generator.iml create mode 100644 .idea/vcs.xml create mode 100644 __init__.py create mode 100644 demo/__init__.py create mode 100644 generator/__pycache__/multiviews_datasets.cpython-36.pyc create mode 100644 generator/__pycache__/parameters.cpython-36.pyc rename {demo => generator}/result.py (94%) create mode 100644 generator/tests/__pycache__/test_classifier.cpython-36.pyc create mode 100644 generator/use_generator_baptiste.py create mode 100644 late/__pycache__/multiviews_datasets_generator.cpython-36.pyc create mode 100644 late/execute.py create mode 100644 late/multiviews_datasets_generator.py create mode 100644 late/parameters.py create mode 100644 late/test_generator.py diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..0e40fe8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ + +# Default ignored files +/workspace.xml \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..3c2c013 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="JavaScriptSettings"> + <option name="languageLevel" value="ES6" /> + </component> + <component name="ProjectRootManager" version="2" languageLevel="JDK_12" default="false" project-jdk-name="Python 3.6 (develop)" project-jdk-type="Python SDK" /> +</project> \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..6164328 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="ProjectModuleManager"> + <modules> + <module fileurl="file://$PROJECT_DIR$/.idea/multiview_generator.iml" filepath="$PROJECT_DIR$/.idea/multiview_generator.iml" /> + </modules> + </component> +</project> \ No newline at end of file diff --git a/.idea/multiview_generator.iml b/.idea/multiview_generator.iml new file mode 100644 index 0000000..d6ebd48 --- /dev/null +++ b/.idea/multiview_generator.iml @@ -0,0 +1,9 @@ +<?xml version="1.0" encoding="UTF-8"?> +<module type="JAVA_MODULE" version="4"> + <component name="NewModuleRootManager" inherit-compiler-output="true"> + <exclude-output /> + <content url="file://$MODULE_DIR$" /> + <orderEntry type="inheritedJdk" /> + <orderEntry type="sourceFolder" forTests="false" /> + </component> +</module> \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="VcsDirectoryMappings"> + <mapping directory="" vcs="Git" /> + </component> +</project> \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..bfd71d1 --- /dev/null +++ b/__init__.py @@ -0,0 +1,3 @@ +from . import generator +from . import demo + diff --git a/demo/__init__.py b/demo/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/demo/__init__.py @@ -0,0 +1 @@ + diff --git a/generator/__pycache__/multiviews_datasets.cpython-36.pyc b/generator/__pycache__/multiviews_datasets.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a9932a1bb71068d596d0df24e283e78348088be9 GIT binary patch literal 7067 zcmXr!<>i_-Q#Ed*JOjgH1|-1Dz`)?Zz`#(P#K6Fi!Vtxf!kEI8!<fqy#l#3=Gv_ep zvP7|f*(^D%xolBvx$IHwU_NUOM-*oYV+v~yR}^;&TMBy%LljR6M+#>PLlkcce+qXB zPYYudUkYyuUkgJNe+p|bgQh^0C6{wiYGO%hib8&#LP%+<f?s}_f{~emp{bRzg_W^| zf{}rtB^Q@NVrfZ6evy?zQfg6NVo^$J3D--IyEPea3Fjter^YAeBo-HEre!84mSpDV zX)@koO;0Q>F4kna#gd$wnRAOZEhj&}$S;`*q>F)pfs28Gft7)Q!5QS+DGUq@B@8tT z&5X57C5$zUB}^c;2#C*I!(79Z%}}IL!&J*$!(7Xf!dS~%kyOH*!c@WnQq|1l!Vnu! z%T~is!<ND%!63;{%MM~oGSqUEu+}ivu!GIvtYNL;G-s$~tzoI*lwhdg$Yv;NsbQ|+ zfbb<4%o#xD*RV=3)Up)XgfrAI2Qz3gCrU9gFmNd-C@2J_mXsFd6)Tivq$=cQ=4Iw4 z<|w3O=BDNqXXfWA<fkd*B$lM+l_(S!BqpaSXr$)lm!@YZl;kTUm1gFofVHJ0mLwLZ zmS`%Z<rgWWr{<*=C6?qD#pjmhlw_7=rj{4SLscjw=jRqA7Nw?u^h0G}Ru#wRWEPiz zodhNV5{nXZQ%h2dimkwWT?7F0v1(VaQh=Khq+q3xmXn_dF%#rfus*1NQ4IylB<3k( z=0Q{!F)}bPyaW-NY`2(m6Dx{X7#J9CF&CHS-eN6E%u7$b#hjCxR|JY_O|B?Wtd6<G z7!)Oxn^+MKl8uLl=`Bt;GfJ>1HKjB;H3cCX#Z{D=n_re1UtF3S#gSK<3)UUQ2{xmk zC^b2X2h2<^Eh<XQE4jr6QNEI)h>wAR;a8}BMt*Lpeo|sVNoH|Ls=iBpa%paAUP-aO zduB;_eo?kQJXYf2F{O`S!DZobR;*W0c}onE2;#ws0O42xP$HKDrF>ovMgb-cMiwR( zMiwR!Mjl2EC>CJkVJZTtgXa)XP63q%;2Z*ysR5M$jIB&HjA=}g48aVVOo{&3QfEO? zepYHSC>tV@CB#LbJPt~~$@w{@xp~DP>Ew+3;?%r&s0cW1DQINoDPR-NFG@)*(gddg zFhMBkAqR$nl|o`sQDP-nBR0d4QY3L{5NsUC3-Dy3ppjRao0M7v5-iFu2m24%4LS-r zsd?!o86bHqPJsB#51c?WnQrkQYmE}dq5-TDLhyr<Jr^S6jX*IDia`e6A|X&7(PCg= zNag`00R{#J83qOhP%;EXZ?O&&149jC3S%}ykw6JU3R5#<6Jre%n6FU6kirb+GpDd* zGZYz=Fr=`8`5a)rLkU9)8-!oODbA3>D9w<<Uc)8AAi<CVD&P`I7*aSv@+FK}Oj)49 z4yvk#tA;U!tCzW!r-Y@3r<t*qC!Hakfr+7$v6i<YriRCbAvUCzuZ9m)OcpPz;j7`y zW+|G~!I;Hb!`IBn$dDpg%U_{Z!w;4}R>KdLzg5Ck!vS+g4OcT$Eq~#*aE2P*EY=!+ zkgmc>;SBAJX^h~ihy!FxGh+>}I72N@3U3Ne3j;)?mZyd%i#>%eg};}Pk)ejUgrkP1 zhNYQFgrQcTgtJDVhFuy|F$k7$r3j=5rZA>(q;RA#rm&<4rEsJO*KntZq=@$N#4yzg z)e4s|W^t#8fy^%9sS(QJO<|E_Xl8U_Xl86?tQART=wwJ|s1>a!E8(eO1c}FH)rx_` zimyhbhNnhMk|9MTMZA}}R=k6ugr!EjnbDk~lc9vaMyN)-nW<KyMl_4HMxsVMjS1vG zs7~<`ff^QwZcx<&H9@k3F-x#Uq(-utsa9kHV_`=)LpwtnV~RuyPYXwl2*^#fQYD-v zLN!v&j5Pw%3@O4Q3@MDwOtsQALN&rQqBYVrf@z>iTPkr5Ct9VHh^S5!5Ji&$v^oLj zIWR#e$L7TsC*~I9q!ue!!OM71ZiHc|feMIx22+nE>p>MmvQ-|qe1@8<U}vjf1a=V2 zU<H_4k&FhF-^FOE^7B#^K;jCS#R`cE#i=Eryj+~0o2rnOnpjd=lv=Ey0WHfkK~kX7 z4w8={6*JhlJV<K==1Xj@!R8)t6DPG;0c=K8d|G01Nq!Nm@&p-T2$n&3341#M-AM%6 z3Yo<U$%#41r8%J1Kt^gFBz_BWGE-9&k}4J8?f}~js>=-&Y-}MFI*5UAQ;>qKf}w(q zokCu|LP36Uab{9xPG(6ZD3n08YhFrXUUH>EQff(gYHA)hkP(3o(rW<Ho10hxZuyX6 z04TUii$N`v_=40TNZ2E#FL1EN7pE4Wg(%oVNP!7s!qqF-Dqu5O3$6faAlRV1_{_Yt z{G!~%lFYKy_>|1D%oMy)2eVB<K>?fAqQuPN)M8MGAhJSUex5FpC5RLWQ4DqllCelk zY&IYw3zxY%NO=XTy|%a;sQ4TMuS0PdT9jX!m!g5Fb<x|t`q)AYQVQS*bFA%YP&P-T zKosl1QlMr^i6*q6qEM8Wmy(~GQ>l<vl%K2Mr=gLe?V_n~q@z$=l9-p0Sd<c<l3JDt z?wrIKX)2UwmSiZTfHDXcgKcaTT)+Y3f}BKBz(Sx{gJl9tt#BonQgm=GrdMI^hr0{p zd=w1RqW}*cP=Np{qrm;Q%)~r}l+5CiqRgaHaEp>q$l|tXK!OD1SFn?CcW0tNP18I` z>lEa7xH~oAwV{qekQTH8(^Rkp_vN5nfz%WQXblLqr4pA}3JAksCS>N7q^749DdZ$3 zrREfa`-I?<OCdKkHz~EKI3u$F<eb#R<P4}CU@H>AJvWdH)I|y<rQjM{BO<;`N5M0` z44S$jJvUGQM8uaV*eXORK%1wjDGG@>`FZIIDe+|r8fb+CDCWy_LAe8*$ut$9wW&g8 z9>{F46F_Eqg3Sb(3{#L-nwwgbUku4wkW#TIGcO(F(kQqoP(@&4O7n_Kk-K!opz0wR z)Q7fGC`c^=u@ci$LHZ#o;fA0WIGM=R6-rsEU`42m#8ZyxqLyQz>IPmk=H-Jr!SD(N zUe-g3xtE}pe-#fbf<UdEDn3wMl~@d_H9-6#HU<WUDhqh^Z3imv3-U{g6mn7(lJfHu z((?1beYm_-P`^|mB~>9uJuxRezbLaLBR92*Ed|`-cnRu4fP3Lp#hy6|X^EvJ3WcSq z3ee(DL07?83zo$ZB_K$vfdbgJ#IpR%B88OHVuiBAoYc~yVo=*46I4kSE9B%C6+<je z%`M2u&jhm*6+o;!P*s|fs>ytdIVV5)7He^GVovHUmg3B+)MU_b0H~t_3Ixz_fH0^p zB*DPIkj_xU5Gzp2Si{)Ckj-AiQo}frsgNZY(j_kfb%->XZZYW@++qQxJx#`2+>lN? zxK|36F9P+mesS4=1}{KkBzA2evq2WIF;p2q^CZ?G6+N4r{N%)(Vmm#AW(=1GgG>{F zxs<1tv4jECFF<moCgUyU%)Am!rXooO28JRj5CL*{5tsnEt_ak*YX=zwaaM>X<1O~0 z)Uwo~;?$QXKwUx5(3juK6b1$cH%*>fobXcS7FS+8tQC8U7bP7>ap%Ru8fhg_Tp&&X zc+?K09b9Up7T*#9r7-k{)-Cq()XemZlHyxzX*rn%@s&}WVD+E|*DW4Mtr=fjl30>@ zi@i9bG%YO$H3C8D3mhz<5eRSua)2TbDM-LxhX%+k#!N65LcoJ#G9oxMnTkM30Td(! zIf==rmXVxVz_D<P4N?SYGT!3IO)W``_lz$?bHD<G1780B|Np<H@GY*e#GKMp*P^2Q zqFYRP1-IBBVR4HMlC*EJfYR74w#0&h)V!1;P%zwLDb6n{0T0sLVg)yXia=3(ixoT) zQN#%v0$?r9NGwRb#g?3rpP8Hr?)rlVzix3Br<SB-rllFC6oKMjQ>I7>q@EpGMii-n zcpR|epoklku+%_e0`Ta9HfL`Mqa+PTTlN-X$}L8hTSAz5y-1URfgy@HCB7_*BQYff zoR^|FQZgX}X1ACVQ&OT>LENY!6_9<5QMVW?qnJJ83qf&IT$)o-3{K`Cp|U9Ei1@N7 zq0}63p;R1Sk{=JrqETXyG8!@F1sSKjC5b5xE!)9Sq{&pI2lA>khybORTQc}^u@fjB zgFuCg9tR^TlwjmwmSW^!<YMGv6kz0HWMLFy6l3HAVJ03XJ|-R}E=CSUAtpIS7Dg6E zJ|;0nkO&`>9HS6eWswJHga(uXLB@k36EtWE9(?Np4^L+?g35wgmKv5UCeTovI713k z3Ue<LBSQ&87Gn)lGh;1l4J%lV1tFKhTFX}9QN!%Q5bIFO4pzyY!Yav7!=Az>$&kX{ z%T&uz!vWUCA<j_3T*KDPD9(@q8mEDpUc;8cX3kK{R_GMYkir$rpvj#$2R7^n8M{Xs z)<Sgt!4a*Xtze{(TwJD*mI><Am*j)nE}3~LpmF-t6orDsk_>QN>;{(4OUzAGK(rem z9cReUp&r<jd{A=*G|UYV1ZxGg2B2-3_#ALs2Cf!W7_J&rx*Nb1p*cC)AQr3;Ob{Bc z0J&emN};%<2&^30!Jw8He83UaAqrLshAxl>A6Ofj3!sDJpoRyeX$4jU@+O#pyCvzD zpO*?&QYC=w5|B^bKr<qlc`2zCRYIta0m*_ZaBzzNR9QeuGf1whVgbqfX|mj6DoBaq zbV)2pbOVj|N3oUU$0rw;-C`~-DbnP;#R3Y6B0*4A=R-9=iXY8;QT!=s@yPCr;sgtT zTA@+Q$ps}*T%ZVw2m3vWJtYl1as$q-5CWV{`HE7(Wdo=PNG>j03d)$EfldZtE=D0H z5N2WIViaSPV=VGxU|`T>ECMwHGzD(4$H%ASC&$M}2^44Nq$U>S=|RVdii<#{X%T4T z6k6QoCYEG?Clnw>C~ID6Zb4;{A;^<#1)yGCagjHu+yGTgkfIiphr#wiTnes~!35aN h95%W6DWy57cI+_MvM}*5@-Pa3iV|iHB@O|uWB~S@<=FrL literal 0 HcmV?d00001 diff --git a/generator/__pycache__/parameters.cpython-36.pyc b/generator/__pycache__/parameters.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fb1c18a3ebf4324a606544fb3ae873e4fb6bda16 GIT binary patch literal 1320 zcmXr!<>l&Mz!1mJ#lY~G0ST}(FfceUFfbIqU|?WKVTfW#VN7AlVa#QUVqyfbnRA$P zS)y3LY?d6>D7F;VDE1V#D2^2ND9#j)D6SOwDDD)_D4rDdDBcwID83Y~DE<`gD1lU_ z6rL2`UdB|GW`-2LIZRQ4Df}q{DXb}iDMIP2Q9`N0sccyyDU2z?DI(3x&5Ti^DO@RR zDV!;yDf}rMDXi)2QDP}#DdH^*sp2W@DV)trE)30#Q4%TaDH181>C91*DO@S+DQxM? zQBo<AQPL?=Q8FpgQL-tVQF1BlQSvE_Q3@$sQHm*CQA#N?DYEHIQOYTDQ7S1c!3>%T zRhC@NMX8A;sVNHic?uz=sS1AiWeP@S3Wmm3rp8tVrV2&|hL&7h4vD2D8Tmz43Q4I& zd5J|SsU=)5L7}I~c#AnW%vqE17MFWsX>oC8VxFIqU*-#D28Nf+3=9k}nLxxZ5Wxi^ z7#SECKstP**m5%SQWJ|b8Kc;;Q;YIab2OQXI6<;Zeoik_7#J8bzc4Z|WZq$AV930{ z&cKj<_6!Jouuq>g69iV;zm#NPU`Pg;2yzL?B9M*Fphy&8U|>jRs9}iZsAa5SoXAwj z63noYp@^4(fkBh$7L%Sq6kA?=S!QZESO`q~^3l)8&rQ`&N-QYJEG|jacgasK%}vcK zDb{z-EGf?~%GS><%_+$Q>55NJ%}XsxEXgm@N3aSKixP8FOHzx9^$IF~aoOZ#7MCRF z=N8y$f?Nc09vedu$YC#885kI<)S=eF!V}59;F84jR3Bq~m&B4p{VEN@3f+qm3o`Uy zvVeS@1_saU(~ZI4GAz7y+NXzt!Bcxp)m!ZG@hSPq@$pgI#o0NjiA8yO#bvogAV)?C z!$k5DGs{xrlM*Xai;F-Z8^xMenp;pA#gtcYi!(32I5D>%C$;z%J0v8FL1M`{iN(dK z#kV-3;?ok7OY)0uF$UdYN-n!4k{6$xQ(6oPo%n*(qIj^vTLNGzzBsi2rd&8LJ~J;Z zzbH4c1Y~(iW?5#+EiSOK%(TqZqFbT}c6>@^a!F=>USd(@Ek3w=;z8kliv>hRu_YEI z=B20JVhe(Z^1_@2;j%(xd7vJE2tq7CauY;Y0*k{SGMoj8B^mK4i6x1*xWLSGP;A^{ zOu5DAa!aVVBrz`~u_z@zCABOw5#-z`fr7-M5@c^i@qtB(Qd3IvQWEn@qIjYF0&vcZ zVlPk4OwTAOj$%v8$t;Mk1gBRdf*+J^IBatBQ%ZAE?bt!d&4z)2frXKWk%y6knS}`x f3Ni692{8#Vi7;_7ae?GHz-ri8m{^!uSUH#h3R#R* literal 0 HcmV?d00001 diff --git a/generator/multiviews_datasets.py b/generator/multiviews_datasets.py index 44babd9..00f1777 100644 --- a/generator/multiviews_datasets.py +++ b/generator/multiviews_datasets.py @@ -63,7 +63,7 @@ def projection(latent_space, chosen_columns_list): return latent_space[:, chosen_columns_list] -def generator_multiviews_dataset(n_samples, n_views, n_classes, Z_factor, R, n_clusters_per_class, class_sep_factor, n_informative_divid, d, D, standard_deviation): +def generator_multiviews_dataset(n_samples, n_views, n_classes, Z_factor, R, n_clusters_per_class, class_sep_factor, n_informative_divid, d, D, standard_deviation, random_state=42): """ Returns a generator multiviews dataset @@ -140,9 +140,9 @@ def generator_multiviews_dataset(n_samples, n_views, n_classes, Z_factor, R, n_c # Number of informative features n_informative = round(dim_Z/n_informative_divid) # Generation of latent space Z - Z, y = make_classification(n_samples=n_samples, n_features=dim_Z, n_informative=n_informative, n_redundant=0, - n_repeated=0, n_classes=n_classes, n_clusters_per_class=n_clusters_per_class, weights=None, - flip_y=0.01, class_sep=n_clusters_per_class*class_sep_factor, random_state=None) + Z, y = make_classification(n_samples=200, n_features=10, n_informative=2, n_redundant=0, + n_repeated=0, n_classes=2, n_clusters_per_class=1, weights=None, + flip_y=0, class_sep=100, random_state=random_state, shuffle=False) I_q = np.array([i for i in range(Z.shape[1])]) # 1D-array of Z columns numero meta_I_v = [] diff --git a/demo/result.py b/generator/result.py similarity index 94% rename from demo/result.py rename to generator/result.py index 9d2aac6..dfd27f5 100644 --- a/demo/result.py +++ b/generator/result.py @@ -5,10 +5,9 @@ Created on Wed Nov 27 16:14:14 2019 @author: bernardet """ - import parameters from multiviews_datasets import generator_multiviews_dataset, results_to_csv -from test_classifier import score_multiviews_n_samples, graph_comparaison_classifier_scores_n_samples, score_multiviews_R, score_multiviews_Z_factor, score_multiviews_n_views_R, score_multiviews_class_sep, score_one_multiview_dataset, score_multiviews_n_informative_divided +from tests.test_classifier import score_multiviews_n_samples, graph_comparaison_classifier_scores_n_samples, score_multiviews_R, score_multiviews_Z_factor, score_multiviews_n_views_R, score_multiviews_class_sep, score_one_multiview_dataset, score_multiviews_n_informative_divided import warnings warnings.simplefilter(action='ignore', category=FutureWarning) diff --git a/generator/tests/__pycache__/test_classifier.cpython-36.pyc b/generator/tests/__pycache__/test_classifier.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3d7f2cb640b14537fb387f740ce617f808f9ead6 GIT binary patch literal 34322 zcmXr!<>l&Mz!1l>mx19i0}|k4U|?`yU|=Y=V`5-PVTfW#VN7AlVa#QUV#;NXVrB&K znR8fjS)*9NY?d6hT=pn-Fq<`pBbPIZGnXrh3(RNB;m+lW;sLYSb9i(4qWHjUjvW46 zfhYkmn=?l+S13vd%;w4wjuJ^>OySBA%@T_e2eTxiBvZIkcv={uq*8cO_)=L@_)`RW z8B^Jt8Bzr2Fr_i42&D+OFh)sZlT8sx5ls<G5l_)f5d+f_DZD9SX?!V?DN-$rQ8HNd zKy+xK=-^F}PLXM0jFL^^MRx(z25l7eJSnm%axIKea#(GExeD1$yeaZ23N4IL@_2OU zpxD8iqL`x8!WgB1LkGy;$Tsk#D5t2jFh(h2RWF(%0uCWvFbN4K-W1gowHC%GCA>QH zz^;PnP*2flVT@8v;R<HZ)UUGSaxO|uEJ;mK$j?&<$tYFu%P&(fvQRKIu`)EVGBHyy zGBC8{;&MnVEy>6)vQkJ&Ey_zQN=Yr@dI?ICnoPH3(o^$NixNxni{f)jb4oJHGE>Wo z<5Ln#5{pwyZt)eR7MJFf6vvn3$0rw;X)@ko4i0nHWW2@Yo>*F3oSB&C=cLJWi$A$2 zzqmNQEHNj(I61#4^%j3{Nl{`+W?E)yinm*SPKqYuExyF$<kF(V<VuJVO~za7&iSQz zC8<T4jJMbvD^iP%T{IbQu@n~;mG~tyf}F>|z`)7Cz`)ADz`zX3GSe6s7)lsA7*ZG| z8JZbu8A}+mm{M5N8EY6tKzyclhBU?$<`k9|juK{&x@N{^Mi-C_a|vS>OAT`~V=YTX zKnZgVSUlFHmbHYnhNXrTY#Lh$TMb(ldoxHqdxcsJTNXz%g9}5fR4qphTMdUKLk)X2 zQ&B+;M-5XAs|`Z{Lt#NUL!nnVLk&|fgC<+mV=e^+1%;s0lG38QVui#!g~Xzw#7c$o zjLhT=Q0$~8CTA!V<(DgzWTYzOCT8UqWtLPbB<Ca+7c1nafklfGb5p@eGV>G)ic(WD zlS?x5^NQnhGK)*Vrh|!q#G=I9)RNSqVk<CT7XiS0be#%T3LpbOmKeG~Tm@DKaxRzw zbu+4IV41``1r#ksplEpsBH$6k#K6G7#=yWJ42mEX1_p+7h8l)gnOepUh8jjmhAf5> zD65$<g`t^gB2yttFoPyz5f1|cgC^50_M+6{jKqS}Tg*ABd74bOnDh*8F=lEq-V#9f z#!7}?;rbc*xvBa|i3KH@#U-iwF8Rr&xv6<2#rp1<CFS`=+4}G#91l;{`UqA@YH>-i zK8TD5hdU^J7U>mK{^GI$g-~*SZh@T|$R7|-RVl)v2^<4@HaYppi8;k~dI;4o|NsC0 zUz4*4RI1-%$}70V3Xb(#tVM}=>8ZEaGKxzQle3FJsrVLiZeqnPwz9;W($wNxEZM1* z#hRQ&f(#4{w>b0SixYDTa#D+<gyB{q{2wI&4WxL47$i|d$t36JB`21o<|US-V#wZN zgP3}YH5ruAqBwICvp{*YH1!s9c4}pj5GV>5!A>XwMc*x6xaFYW5CesT1SlW!@GuE5 zNilLU@-gu+@-Xr-@-T8RaxoPtGB7Z_<O2DCIXKMs7F$kcUTPvZJ7=dB<)!9mG8HL< zWSRV&l0lgmWR@s2Lpy^^w_*fk=o%(a9hk!0%UsKv!ji(;!cf9k!vxK`Y$<GLBJ36G zY8bMZz?pMVEk_D_Eoa4y8rCf48U}DSWo%}0VTf&sVXEb-<%WpW^3?Fua7i-M@YHZi zGB7dJ^49XzFoH!Gp(6aX0yQi(%#sYXf;EC!EH%6}{8_Bc49!f83^jsTY&Cq%jI}~F zY&D#c3|Z_oLd}ef3?&>j>>z$KV+yEfxKtwm;@dFPaHcStGt_bxhJ`Z}+JrNtGt>%K zSd=i9aMZ9tilkVLTFw$qh-uA?wIVgbS?ncTAl;G-AX{rhnwda06n+V3DB;fHsS$2w ztQD>i&f;xms1eTMYi39T6*?S=8H@}J;G7E0!{A~hHx*R+<fVW)#U+V(DTzfX3Mr{& znTeqC1XK{f%3=j@ajj6Ek&05{C?po8Dio*Yq$Y!^mZVBVd6l1rTrz=-O36%1OD#&x zD^UPf9bngh2}0!*Sb03OkV`D8RIpM2alm>r^GZ_FQ;Xtr5|dJMiWO{;iml`_1uKQj zJV=3v$i@m*3dJQwV9f|YOw-|o9-ILUIV`1LNO`_OT26jqNio=1aFG?F$$X0=FTS`S zC$psZ7Ee)PUP^v$d~r!)N$M^3;*8R?w479wvJq5q1%uMM0IY1}sAWuNfRuyaEC?nv znTq%s7#NB`s=xve0+cO_L>L$tTtP-aizOv^HU-BXwsMIUPH;mw0bCDg@)xm#oD3== ziey0foE01_x0us1OF$)HS!&KLc4(1xi!HIBAT=)qTvCDyjv@|_W>6thBo5Nd0!p^G zn2SqNG)1EX&=bckKGY;}iz&J678fF>Z;2w<NU<KpT%4U2#Se8vW_}*HB#+_**8!kH zu1Epo3Q(xu;w~vl%*>0=%u7kFxWxrl0bw&n++qdG-C_ZW-eRwehj7^{<G}*V$vJ7a z*ebyV@GVwwq(t#Uxbd*Kg2X4N$c2=(d@u<}bI1e~xH_N$Qh<+9h*5}%kCBB@iircv z=3o?H<YMAu6kz0F6kuXu6k_CIlwsszEV5@{V90#I%)syxl+0d&(g!$|zGPuwU`Pg4 zN+1(K#Wbk81*s`!0(CnWQ$Ubu4s$JE4PO=mxXlDAbXl6g?Ilprlfs(9n8{enU&Eip zkiwS20xp!1<iX+~Q`qN#i>egXbcRgET7epYEQS;gEb0Za7}6Qi8B$>CIZ@SgGL$gY zFoRlSHEcEPH5@gZHC#2^H9R%E&D=4pwSu)mwZgR`wW2j5HGC;tHVieQHGC=DHlV_p zxrQ%=)rO(Msf4ix?7~>HTCp188Zk+5OR<9?g{Oq2My#1poWX_xA)W=&ha^+@Ae^Cu zHH!`0isVh<YvJf%0JR4}El6;uh^baQg|$X7ld)E!Mk0#=tWFTz+JxE*D%=?vKyoQU zHR73!pm414sF8q}5^GZ{S;GWs6xB+VaMVcENYqG5GSmpA2-h&Ai1f17O4mqdaW*s5 z@YhJ%Fw{tAaWym42!PlV7z@il6+s6>jZh6k4XAQtjbW;lsg<pfso_r%wPC1{tq}mx zB|KTY;HpXv%;Ilmtd$3|1ezIZ6>8*a<ZBetn1dNK#S#xNp;aBm3c;yGnW@DJ8flqD z#U%>mnI#ztDVe#cdBvbsD5$K@0W~#B6p9NHlT&pd<-I~;UW!6NYEg1(UP)qlDo7P> zJqo3H#igk!3YmEj%{q`ODm6JjF9l*BTEzr19MoPe$;d2%ssM43Dz(fq9F>|T$SGJH ztfK%f%~KPLaw-+lN<n^8C_pr}6*Q2VR1jApOKXDt3o;g~URb8aq7iH>m>^U!QdYx( zt$;TubrkaA!DT6k0WKm_i**#D;?ok7OY)0!6oMe)rNyBBM0`PN5u|0T1L>H=7pE4$ zm4b@KwEUvn#FEUi)cBOlvdk16g%lkH7aavye8eLf8CD7f@O}y?Cg81bEZt17lR*_S z2!lDeD`i+J1uKKpL(EmMbOld&AR9rElm|_$RdRWyxk;%-Ao0vRxGxmaQWHx`i&BfL zc#u`2)SjTu4Y&fA0u{EP&W%tlV+unGV+jMOA1Tf-8PcU;0z2jw3n;Wf{p8}3B0QD) zFD@HUV+Y*jC<c}Npt75dp-LZId&KAGr6R%!)*DBv=;3;)=I|m=gItrb$OY6XV8Y!P z0X0O*5DwO4s^US;N>$vTfQ04^!r_?Ckivi#j*O(Zqzd5@!j|ETG*WD=Mc7DrvIAR; z;&4#K4QliigBpF{t~3v<D~+qk2x`HAMr1%~0+QDuZ4Hny;I?BIl9P!{U!bNDIBkNP z6QE(4#EQ&f6qkbX1;~nGP>(qilrQ8!E=9zrBnfe=$yfwxULrXhl+JpQ9FCI8ZgIfs z3D6J%D9dE#r5717FfgE`KTW<{Y^4P$i6yB;;6^GakKJMgk2iqRc@e1fd5asVzz4Tu zi$Gp0(g!sR4L}5_m351$AmtVtq=M8`1Y1>P43Y#j+=^sD4On()Nek-ELrdCQ9I%r1 z7GuyYk-Yfi9Q2a*mH=2SQb~JD7)wcei!tREqsuKJ%#!vN7rfLfz9k4w1fT#(Esh5@ zsEcp$BMTLmq}*bRy2V&|iyuBz0vSrUB?+mU;}NETssm89ev22yLEt{hEl7OCLn_c) zJYY*eqlWP*X^=Lx4JdT%Kmo%GatOR3e2b^JBn8fk;!R0Ivho&xN*bg=40Aq69_pXs zk`!<Pfh87D>boTkmc(9QCW6urXt13@kcUx%iGxvsQ2<H`F{v?ffJQNx`Isaa<(L$h zI2dIZ`Iz_^IT*zlc^HM5L_qRPT#Q_xk&bjwdm7Y11GT4B85kINVPldu%nS@Q3|XLI zd!}ASMo?#;Kb@ggAcYy+9tI62FhU14m}*!N?duxBEJkp9m<`%~hNu^+5z1mrVFwR+ zpsE)_Q_q2_rjua;V}(}@gA2n1##o11;S%N=;Tl%-wsZ|2xHT<OD_Sd7D_$#6!=J@c zBMxe()(B*=)JT9>6&fW_n`32aC2Pb$t<)4wNzk|}R|#v4B%~c(cq5#lge{95+zJLw zAb?uS;1SbiM$m9}FH@~l3UiG}CL_3A%LrD-pTgS00W%%iT9(dYOcAJ&g0_}JYNVm# zt{$~A&;c3Q63!ag8tEDtNroEH6u}y%6ro<$TDclQkVqCwjU1?@403sm5L|>CG>*&2 zFoCf!B%Gmyrvubq0QWDNK`m$bT7??H6k$n*ES4I1h)yU^0mLip3uh?d&El&O1h>qY zY8AmeAt+C≫B5nSqg^!mdUT>RO{(nG*gi!5RjLt5QTu*s_Em?rCNeVJHy>4QNR- z)X1cW^|CNBOkgTx0uKx+S2&f3)PUOj0?o`W46#Nr%(cR`Dzy?d%2_NWte}CP6mfBe z64n|OP`E?c5+F9nr4yJyBhV%MS)w%zHOeUxpotfd`6c{WVky!f6(#&x;wdsU%;F4C zo<xdHihK%Zib5|lBSQ&)mSl=z2`eaNiZi4rfov*~s^Lgc1`R-oGo+}1L~7VU_NAzT z*=%68nm7ZJnbI|kDe7QZR<NuFL^efJoFPRkMZ1>?Y?BNq%u4vPWI=R|VvQ1b5KA}l zAv0P}#7My<u_VzAG|Y|K8-bPj&|xcB@1Pi|12I&)2axU$yxk5Sr%*^NRsat{D5T{V zfxAkXc`2Eo@!k|j`y4!g4Q>fTrh!274k-%7iDjT(Tw)2DmV(5R3<VGopI($$kfEuN zr~qat6r~mvr52~=fd>1~f>;CG3rx<>OUq12%}Y)Njl886l_lnADwGu~pgI@q5HLZg zhl8pQIza)}3F-l$VDPj_YEEh{XoNAXC_fj?QiYV%w9LHJVueJ7ywq|9P!&|13hlhY zv_Xc!A)|N*|09hUqSy#hp-_?!4n{}-WhO)PQl(=C>K_zCdR>Xd#e=yw296NiJu#GA z3Qi_y(F#kwU~}9+6MjXhWvNBQsibyVDD5Te1GNJ{jXySqDm_Sh0jc!^cQv;D)d2Tz z4k28Q(fB65Qwr{85ZSjoim(yfp#|$jv778J3VIVC)Wx}ou#U*6LGPLrxic^@pg0iJ zd<V4+Y1lKlg5+#SOBmYaE%Jc$pFKgXU(5~(<1JQjfu0`If9R`5%zt}L##^l5Nse2L zT({VAQ&Tcab8oSMTHUEdnoPGi((?04iZiQHZ}EXsY+_DkdLF1KtjhF*4`{%Ow*WFn z;kXb+VI3P0c7)E{L#tg-W%PVWfi2hd#UJdSgIdH43=iz9xFN1q$Vp8r(PX~Gmz0!W z0a`bZn3tT9Uv!H(CqMZXZ&H3yN@`JJMQU+DVoH?|VpIlPEGpP4RIw}QDg-ImDik?^ zx^pUAkdZ3Tzzt|*0XCq67?=d<5m(SvKp2$_no~~&%^FpiaG|*!RB53H9JqD`YXaA& z3Q>HJG6EW|@wXV`t6203^3rdymSm=9lxQ;E;s$#zGcP$KwOEs>r~nk(g`k+1yTwwR zUsO`$1rmXGI*ar`T+q-8xXW4O1LA_FOt5t|i+n+yCQf)0=oWK9PRT8{w9NF<qSRX) zsYONkMM;T8;8~L*Zjf5$s?360Y!xM$$=SuX*eW3DiLDY!=cJ~m=B0q#UXqh~i?t}f zG%w{AJE)P6mYIHwr8pzM{1$6+PJVH!rs^#|cxec3f!yK()rOGv0(z&{ALNMu5D^F> zf<OeQOezWiu|h#a7>Ec55fLDF%Hix{LM#(P>1Tq)ilRWeqd^3ylU)=8V#R`pI1mvJ zA`(CZXbPtY)Zx3u3Ga)dbwrDjK?+kqL@J0#1G$MC97@nxA9m<KHfA?d3X&winGl>* z<KY3d5L6a`oXH@@!^FbG0_|Z!dYhn$95qlMl9`W52CbJV#3;ce!pH}n?BQeLVB}*o zVq#&GVANp}0?oBE3NX4bf_te<MSY+$4&+5pZUc>4fY_iByJAowUBXZU?%UKbVC(VK z@Hg|vFx3jw3c}Sf;!!75D_kRx#aPQ!BLJFYuN9fVSiw^x0`6Bb)ryudXR(0V?<LGx ztTh5PqLK_*Y&Akq8p=;$1othNv)EIZQkc`3pd<@uP6^yQVb0=6VXI+)_DPttI8)ea z7~wpw6!sJj(8PNQa~3xen<s^{gcmf0F3ym`CC*U7Tf$evnZgb2sqlbApgk2{afTXE zafTE=aR!jNDg5FLNVf6UFoSq00#I22uue$71|$a3DF}BBxTjaboFxeIbzxdKLyAx^ zgQjrebJ+X|Sxs+nLMq5fg)}gWQJUc3q*e?n@eqZQ5vaM1O~Oz?BR>yn3}_xkAvr&% zG&c{Vy9m5?3&a9zL<}ewBNDGBxN!z12sPRfjxj`PIwM4lkVN5`1DuIR8~Bh)Y_LtY zfTJCE1K%${FBPmrlkpZ~GH3`4RQfQa&zcDWAMLA{baktk^vtWUH4s4M5(9Yjn21^) zXZ<aVUVrDODO9n7E9@#3P?jhv1+@~uwX>h55V)Ky$^;ee;Mx^5Jy28)D(2aXlM_oy zQj3bDK|Gd%ocxladXTyXFux)vGxrutWlm;pQ4>fMTva!NSS=u;6-2aw>UXhQ+=vu! zh|Do6Y6q$601=>uUlDj@nB+<_N(3<inw*nn2pPqV5<(L#E=fUCX@pg!Q9NkW6dV|! zc>-|71|dK#<XgJn0f2bWP=8`kVrFqZXoxzoxES0Jf(^vNGxHBnO$RcDK~#fLgNcoi z13X@;#3;qc2VND%!|1`N$H>801X`(?jCH)UfRyo4HWJ56k=5fGFNLbXGF}QBEfq$r zJs^6#6l^NZ#!JDbkB*n7Gt?@lGt{b7B$P0vaDv87n;BEY!Q-YrwURX|Su8aSpwVj3 zNGD|cv;;IRDk;g3A^~b8q)39tDET33#2KJ`0T>^oZvs<cP&jB@RIEm&Mma?av@jjq zW&w?(%7Rps@MlS+$RUk>N~Y+hD5h|xD1lo&{8>^d$|bDOb_!(t6f!Cb89(I!kC3W? zT>=?bRfmk0Ld^z^rD}j>AuSS3FdJe5gbgu46QV0cOPnD^J4FZY7^`05p3yPZVok^x zYY?PO0vnwIS0<oQ<3TpI3UMa5{sj|+nr9I8SesgyW2jKYM2wv#7ApiPK$grPx`Ie8 zJ$eqELObN3!BcP_48#D}#;BvHpgIhFnsx9lXu%vtg+>l6fr4Y-O_Q-G1Ju+ZaiI#O zL#K9(3=GB4p;I-CZbZ-^95Qu6xENzp5HvhdOw>9a@USP5L#D0>8zHS;P{$m_ZmJHM z1|Y1f!XG7rVyHA2$+;vAm4e&xNCTxsS)ishXjHJs5yZ*{5jh|t7t}ImD#`<~^Ff&; zGKgzX4TcsKft&_f=T%e!av!#FPt-mTw5S6OZc;b|s)F8yDg%}1pfOx<(FtzpD@1WX z@^(->QirMtw9ZSDv8WspywgE`m;qwT6;*)Pl#X#0Rf0@`_tA=KKwQvTaPX*R5oq~x z5x5Ul2a<sI)`}WITyQrHt#hV&iw#nC79o0JsEc%p(E4Mb0s-fEWl<-{BGBq7q)|xl zFel+r$f6{WUQl1D2)+;o+xTNq52*ji9t7Fx0PdNgEN>Bl#0z-12wdmI2PuLw6lnAl z?|35<vEz*_jAD#@jCSxbM>|G4@XD8>*`P5;SRc+1dCdzK2lh2D=?t}k=?t|((6uk1 z4J(MXFUWluVYIa@khL!&H6loBUy#*{U{x<#BbvpS0$T@ztX>pNJtuUn3{(xc?>7ay zR;H^~tb`dl9x8^s?i||p6R(x1m8_Mjl?HDMk^;5TYlN~`YQU>2Dzr+NKrMY2hFG~; znHovZ`jZqc@LCz}64n|SP+Q*yv<gK4X|)Vb3NLtVIc&8IUkZOOQ>`p`Esks!X9^!^ zKO!Roctwslf}g?+T2o#tS0k4NSxqAVT}=b^71(8RS&S)yHL}pvG+8xr(Df;CwemHP z?M(_LTs7d8;F1hA5-CD8Oew;>thI_Y!Zq^X^)QNA(6uu)A`p=jz8bKY1b78WjVMI4 zMlp*AzQ$%9XuV7aLyZJv92&gFMyXc0MmR+TY^D;}Z6Xj}3SW&9%q)mVjWS5S@EACZ z_)6Ha_-pu^8NswbGh;JTtxAbNmSBx=jY^7WFH@~5m?r||seyT-P@Z~<SPE|oLkUxf zP>OgnV~J3TL^C6392m4IteLS^qedfzt3()Nek+qCL#;-NWUXdJTZu@GMvY1he~kcm z1&>fOy9+~XUJQG!R;_lePOWaOSgl^Ibd6>WsJA5r>XDU*)@Y|lgL;+X3@I`oks2LP zY^TVA*}7o19GDH>d5|IxX6u2)6~Jt1Fk4Zap;mJOQ{kg<h7_e*{SvV(@f7zI70_C; z3XK|JX#C04%9n^`Nz|x7;weS7Mm|Li)GwXDT$mTmP$HHkS))>;k)ocWk)jC_FA>X< zDiO<)Ze{?bCr~f3L@Y}N!oy&3Nis+<q-aYsNHElBHiOm$iDk*w=!0UVL@Y}#MF-y7 z70Z%O@kr52;Y!g5_jbjy6jBV}y<J1FzaV{8BM`epw1xv7O2!}&=m@<Dr1uK7L9vE0 z#S|<H>GPU_*${KgASzSL#Tim8QY^v!T(K-A&}e*#Se7!B7EkfS6K3F)j1*=pg^Oq& zW>zR+=0$}tvxbBOQkdC*!-@Pb^M;0*MF#^ko$1sXz|vWYpd>?$YK?&;gET{mYKm=* zfjC2ooj3zD-LV#4qj?zGqlBRk&M<TUg#$`jbHoTMCy*LYSuM_xLTsAy#Tz~%DB<Ib z7Cvl+-)J5_E-2ySM?(0xVuX(yY2j0&TBBa0mIm5O<exZ$1#JbNF>x#SF!%p}24Y}q zoE1`1aqjK`Z^Hp8M%x(#o*h>xN-RkoZ2Ny8gHOe!xu6L-a8nLx4Fh;LO=^llQYCzE z&R`j(Dl1kX+0l6^Sl1RJB1b{PP(fD#b*&+IMInh+fyYL`1fk(E*g||rutJVR0Bb}U zz*109z#J??)k@r$8BEVm7&#k)d%)m}V~Qc!3Krau*hLvR3mKr1vzef78g%4L7o!&k zbMPP>K%0wjILaUvXjvVozl<_~M(OzCLQ*WF$_B0_2pj1!TDKlyT@_KuU{DO&ZAEf5 z>4SDC$p$p`2~IYk<(}!FePTSdjF3HlNITj<J0_De@a>ocZ)H1;aMvxy#9NG2w-~B8 z6|5A(^rPTQ2HA8J!a(C@Y&r@c!75(Fepiq%tB!)If^C%$Xz~DgxudN@5okoQ>L0`` z!W9vY<%r;w&{kX^Ctx>&3*;&sn!)2d1v#0hO#v=d1zXVC9?$~IL{j%ybD=D%Q?Nx^ zD2eQS*iP6YCC~^T<1O~I%yiJg(<&xCT}?*F*elk-R_Jn&mG<CKS*&Y8;EP4k)`(Ix zEXv9SS|wVAI&2Ca(u6GijKDJP2|AN1A6IimUjav?ppTXELrQa45squfw2I#qv{Vz^ z7zIx<R)ugOiIf$CWHPDX9B{KL9`2kfIphtw&^5bA4a_Pwh**_p5NPW>S}P3fQWOL9 zv8zbV0B>>!tA{Vzt?K8ZilJOo3ms*&$b#+A2k(@H>|&3vk_E*)bOkpga1@XO%mb&N zko6H0PSD`V!xp3@m_@zdkwXif#GD-DkkMoUmj_@%lc@-_)D^s@i)!L3q)jDwgGSXs z4H5M1xonX5f-H&#EuaQZF5F_yFM=L91KPI@nN5ICI7D$2r{-qn<fm5@fo2#$r@K^? z6iotIF&RV<G2>u(ix-~nKovJJ6A$1S1W=8LbK>C^b4q+!6ju~zV=TBui#8Jxg|f{T zoc)WyOQgXIu_>LLh~iF(FN5Zcs3Oox=_sUKxe(hxOQ*q87g5ml)8J#xz_mmab6I>; z6l+RmZhYA-Fcnn<+Q=QnUICrt08Ni#&U8pZQX;qp1b2_&C0Yfj(mw!Ndk9@%jlRel zZK*XA9}{RpuK;+nFJe=#5Th7u!8M~0lLQlJ$r>NC0;2#U3!?^O0uu`p=x8$;Mj=K! z`0{Hb@Kgv6aRHFH4w8Bu=yGjb;zcWvr&K_v{eb4Uz{m1%urh$R5VtUZrX`v|2k(G3 z6*H%>fChq7SW}q52k<bYSc4`9Ku6Nl3Y0KqF{iM@PAqF?Na2uVs1>SEDq*b=$YN_| zs1X7UddCXHFoBNFsS&9Ws1ded0G&!!Bap?O!kNOA!VTGcJb}3)2xRXB=2(wf(Hcg` zkw74u(-~^T(iv*SQ+S{*5QQws10R9I4&J~FH6w)=O@(BQWEOi0UkV@0mK1(e72s(Y zxf(|B(sB{-p*U<cqTs`Sq%fA?NrR5Nsg<jhuMx}QtdRo^g4T#<aYDB5R%n$l)_`3W zD_5&fBU_^&2_E+DU`P=FO?Q9>+QHK>F5%$w)WDmO1yh8;(=cqHljNEiQ`l03dzoq# z!Ez!gOf4KRGn*M}m1>l-*i%Gn6rmF<PBluP5O!gRHLC?}vu2TGs8ufEtx>K~su7k1 zZ}Y8TP7&{AtyQU!0EuLA)~JBaZ3DS`0%PG8&=Ru_h8h{jkw@SO6V+O^8i^DMNro)W z8dZ?G!e8MG6&5uTP@8mWg&}68*hn(es+VwQ36yYW2|~o08Ee#=nQA0KF+72(P%fOI zL?}zRMzV%6MY5NvR-=R|O9V7jUZDp!PpMY8gegn3h7qJok|9M3ls+UGYJ^jyds)F- zl=Hw_lr_^CYPBj7N*Gi4O2j}%o}|b?m)t4TXk~HMFoHrG<lY+1EY1?H5^;z~iY#by zBn31D!c`*%Q6tU(<%`4kAbk^<3PBS{Oj!~&S~Z%Wb6Qgr!BZGaS&}K>siP7m(4x8; z7U-gUrYz|c+Z44Fz7%!vBm`5IOo~Pc7bpdYGk`W7mvBJ`dbPwEO1Nq`K=GfV4Q4}@ z-06rjAekmx!<3>6mK6o7)C032!|D(=!~{Kvt`vQ7h7^MoL-14tQ<fYkTuYd;<Uw?e zMh$<Bs0~A5Q#b=G%uATE6jF>*jC&btwJS<M`7Ac4R<t4oq!P4!0+jw@Lu!R<L~9sQ z>?9eonI^Cl8PsTl<Axu!wE~oaYD7`vCr=KSiWC!2=uKcQ{1eVFfw}NiI75wgmST!2 zD6DI=vy@VlVScaC&QeY>1BsV_!qgruOwHlpXaNopNGKqOqa`FnpeBIA(Mp^l1rlcD zg&%(~gQi2G9xK{_aw7O_z|8zS&_RLVE*A8(4RHMk+9;i!TB(qnpI4HYnU|TDu8;^n z^b~2VNCVGKXGjAAJOqn6E(vbo2H6@a*w`r)rKXhTr6lGhS1RO{=H!6Kn8B45Wv5Dk zN3)UJe<%l1!J4e#_I9xXk{lT;8bSR?h3wQy@Yp9=TUTJa#tTx56u{L!Xh0h!HYf^Z z(DD>;q=H=tCJ2q~qF9c5Xcu#27eym+1G`Wyka1neY7-i*xkMkBhFCe67GXkr+lcWM z(3lG55jTT(?d4!CRlo%tG;zTOv%wh`YkmXg5$t1bpz|g(L1S)Qpfk&A7@=pCF+xT@ zL9(zj%D_vNKmy>FIrxmqBG4I3`b-S)1)$jW&Vx(8c&rOSVQW@VoCfN=fjU1ZqsUYl z*fmDD6x>Ix;s<Rr)+;E=FNjaBOa|?JP0r8BFS^B6P+C-wlX{CSH?b%`KW~7y_&Xt- zO{aizMc4>V2XwO51Ib!Y7ZAl-&?qrzM-s{Bo?$ywAQ)j2)<Xp_(=4d(4%!4|r^z@V z=Lv)&xtR3vcJLT2Q_)J$pv9nABx{0ozzpjE8E6pK5ZuIqj}qfLQh=i2VhUH$hGQSs z#O)<64UD!TsAU88DY)K*F1&><@C<?uxCF%;;u=rB#bgY+M}e!LC_gJT8FZ4jCL<)y zK_g04{D@N=!0Ee61aTq)(rRKg1ziQmZd6dS4b<kuexyPMsUB9q?qMCI`3A7NFhW2U zGVp^O0-#YyXhW+AbYMJ~&}1rF14`j*K?Fz?SwIsykO)3<0W`vQi=!wR+-@ws#g<eV z4?6E2JOl{d3JG4h44Rw8dN2d1ih_(Uf=>P`S_HBWyet_$t_a>!S~L+P0UE)9j4Oi1 z5D|MyajsIn#R969ZgCZ-mc&DkY5=n#$2CNO*;U{}2{d(X@jycX+#oKR39=C9(q_a_ z450cI=ip+L2>5&in888u;7O4vZm=l0f-H^#r7>8g$p#UFoTbnYashY^HR{^vC{eI> za7-3MOpFhTH!K3BOd`%~fUK6rvQRpT6Ko{NtSDi`7Fe*8L4x2h$f8-GHPlRr72u`| z%JOM!$2NddVSJDTsK^B^?q(3@W8z^FW8`AwVU%FxVH98j@j+vZLW}}TV9dnF2tLFC zI<hDRKPV7%fCK1=2OdTRCdd&EN=z(_xRzA&FmW*QFmW*oG4e4gF=;VzG4e5TF$ypm zF`9wL8H;uxj}M|8Gw8$&x>=(Za<m{L{AfW2(9sIukwMg>1tCXmvp`1oQXuL<M+-8f zu%<9$s274B%?(q}hN=cUP8S0jx0t{f8w46Ps$r{P1&`B#4(S$#9McV5E{%M&AT#`E zL1y^Tf{dV}=3GEGgusp#WCt&o<|tu-A1!zW=^$>-6fW>69Xn{0q?r+PR}uJVK}Pt| zf{=r_c~h9dBX-E)Af3gK!UsQEFs?=#dJuO|txO3=3V)3Z=#UBMvS`#p1_f%EQUs9> z8Dz#dWDq0*T_`<)vCsqOA%jAa3|Y*OQOX*j6bKJ|$Y2RajUZ^;1Uym%J8Td<0tr8C z5X3`0Y!Dm@vFL{lij=V8IBZZ9JQBoI$N?V7!*<vp8}zV2<`Ndrm|2RLI70~w^sqq) z8~L!o0MNK)7HCtga*8<UxIvKl;KK&N3**6u4NAioC4x4}YNyDhu&2m_$LT<a4Jwqd zfKq@sLy98ErV^<d&J-okNTE1GiZVz9I$8(XILiWEEUb#OSdl*qv_TfMeHLbpIz%=_ z12p!Rq6Hq6<Ilo=*q~10uF;LKu#K?bW(d;3v*1bwG?WHDq6%D3gH~7$vVE~wT@CI6 zg9&h*kLPwItlF`T;Gm7aU>iMxwyPikhjsi2>?W*ZHdHxG5b6NXxC^*i12MopMBNdK zbA=zu+7L(+l-g&ygF^uKAtRXEU*X41K!O!E{sA_piq6|#SA!Y}poSvWlLNsa4az`+ za24Nrgv(Jn+MvcQsC`KN@s~{q8^Oa0;4QQ$c2l*JzZYR0mQFslsDXF#!NWrX)YAuD zy8+u!joQ-(kH8T)J`nq!SkSt)BCLzEsyf`j7d<Fwzyn0V7TW*Q!nRLdbLefHrRsr! zW>{8@r9px)KMiu|E-1r;D@SmULc8$-kbVkS2&4d4Ctj1OXeX!-wF|V$PL9}P0&lSv z6lLa>;5;o5yx64()a`}zOwo4If;+xAyQknSwir8UA@?bPy#nz+xHQJPz6IPL1fR}_ z=z@YfexP!u2+Ki$Xv<k3YgfRh1`@G$r5hA{;JkvRzX)EgQUqU}QUqFfLQJm^9&Zyt zr3%Ou45FYOAY^3;gaq~TV7)-(qx`tQN5;br^8@uA`4}O6L7d0=?Zw&8GZ^T89`X_L zOep<4?i%45o*K~gLZAzo25&zPbc8&lpM-pbJO|Pd^87VSDFSG{J5I=ER>+a<S)kq> z7eoZwyW_@sguEb1?+(HP_wK;GI?!cIpd;jkK<i4t*D!&OkO%RUO8Bz)LF)y;N6155 z3+mPJWkEKe%1AP#2$!&C2}0ZhK0;myeuTUTsMj@t5wyV!bR<%RQ;Bd5_9Ns$y*lA$ z#uQO-&kAycJcNzhtFsGdDB;Tj9Ye1Sxt$hdehFWeXo@7Lhg!myC6*!u@73{TiKl3# z$fmHT$boxxd|47H^6*|AXlpA=iDV6DiXyaE2Rc-q1=_2F^za~SFjOG@HmD6!HH_c` z=3%`$=mGPfW97kHT0uw1^MSUrGJ@O+?%lzUkk?N9Jld;+_3E%R>A^+!pzqk>ayYo* z4<-op?QoeuO!p4l+r!q!Ls}#c=^#Nu6Kih|vKa|>?0h2VfP4(?V4n=$9wEFYgx~u` z-7CJp-XE&nK}Kf8WVjWi#fP+B6-)9<O&Nq;Lso<@P`YQd>l$((Y@|omP#9rd73tA5 zD7uMaNM1ngCQ`eHI2qPML`l91wn)2^^V3kb#^^(PkfWVNET_xIW0d$9okbBy(SZ`+ zcsh)r>nn=(gF1||#C90z*;AB2^#^zzCAf|xqq7L^1frfAkF~Q1?o@)hX+?O?f-j;% zXA$#=H7x#3O#vMTUK|11q(F4X5p>fStlJ3cToULs9t2$=9SR0d?bE?@C~*u;m0Rrb z@hSPq@$t8Wkt!_cWjVz~pb^d@&_0A)+{M{Bsfk5-dc|eAMWFGzTf#7ryhPBwpGk?8 zsl`R0+uWnXVKTY-DXBT}ko&?j^Yb8y4|F$l6hB-^YDrOMa&Zyp(r3_(dpV%H#X;A~ z7I88#Fhq&u7Uaa2<mcyPXO<M}<${i9Hclx5UoFp)n^=+oJ`wB|YhGz?K_&PKg<FET zi6tOqIhjd%1(hJyEj9=Z9tXL_Rsg#9ycm3$0_47F@MJ#d6l=&S(V!!W!Dn27cV&R5 zUW-8e@*+?;gU1Yz2+-1`TO2mI`6;D2sdgZ*6@#{KurLWQ@_?p{!6b;q!N|qP!OS5i Wr)%P16mFnt6mC?{plDEO6b1n70%G(4 literal 0 HcmV?d00001 diff --git a/generator/use_generator_baptiste.py b/generator/use_generator_baptiste.py new file mode 100644 index 0000000..437d14e --- /dev/null +++ b/generator/use_generator_baptiste.py @@ -0,0 +1,41 @@ +import os +import numpy as np + +import parameters +from multiviews_datasets import generator_multiviews_dataset, results_to_csv +from tests.test_classifier import score_multiviews_n_samples, graph_comparaison_classifier_scores_n_samples, score_multiviews_R, score_multiviews_Z_factor, score_multiviews_n_views_R, score_multiviews_class_sep, score_one_multiview_dataset, score_multiviews_n_informative_divided + +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) + + +n_samples = 100 +n_views = 3 +n_classes = 2 +Z_factor = 1 +R = 0 +n_clusters_per_class = 1 +class_sep_factor = 100 +n_informative_divid = 1 +standard_deviation = 2 +d = 4 +D = 10 + +path = "/home/baptiste/Documents/Datasets/Generated/try_outlier/" +if not os.path.exists(path): + os.mkdir(path) + +Z, y, results, unsued_dimensions_percent, n_informative = generator_multiviews_dataset(n_samples, n_views, n_classes, + Z_factor, R, + n_clusters_per_class, + class_sep_factor, + n_informative_divid, d, D, + standard_deviation) +print(y[:10]) +print(unsued_dimensions_percent) +print(n_informative) +print(Z.shape) +y[:10] = np.invert(y[:10].astype(bool)).astype(int) +print(y[:10]) +results_to_csv(path, Z, y, results) + diff --git a/late/__pycache__/multiviews_datasets_generator.cpython-36.pyc b/late/__pycache__/multiviews_datasets_generator.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4cc36e52166be6df44fd7ed479f9ac3fbc60f93c GIT binary patch literal 7364 zcmXr!<>lg-s~VT2$iVQJ0SRz3FfceUFfbG+F)%QsFhnt=Fs3l&Fy=BvF)@PJ%sI@t zEKw|AHcJj`E?X2^E_)O^n9rKS5yhFpn8KRF6~&#xmcrh`5XF<ik;2)+5XGA!nj(-Q z*uogamm-uR+`<sWpTZi<pea^m$>m&>npl#WqL81b5K@||;Fn*fU}UCXXli9_VP$Ng zU}Ruu$;IW6SXz>iUu30_lv<RRSd@}l!u1m5ZcWBp!nujrsqx7<iN(d4X_?81C7Jno znvAzt(-TXJi#3^Uu_UKv=G<aU%gN6#@=Im{>0)4D;9_84U}a!na0dBy3IhW}2}2D- zGh;1N31bao2@{Ad0^&2*FxN0;GZd-RFx4{GFxRrAFxIkGB$Y6yFqN=?R5dfXFvJGb zvehuuu%$3bFi0}gvV+)?47D63tTl`^>|k>^YglVI%^7N0YglSHB^YWrvKfk6YM5&{ zAbbf1a|V$4HLMa0wJe1;;S4p*!3>(riBgOV3|tBd3JO7~C8b4q#R?@EsS3H7d6~J1 zISMJ6xv6=@nfZAN`DqF{i6yCdB?`p_iOH!78mW2trRf<8CHV?TrI|S?U~MUhC5gqU zC7KFp`9%upsd=eIi6!|(@wufrC7ETJspZA-P!$Tv`MCv&MX4zu{ZJX0RmJf+nZ+ew zCxMB8#G=I9)RNSqVk<CT7XiS0tlAZ<6yT-=DOf3_<>V(q%mjHAtPkp6R71fsiFpc{ zc@Wh_j0_A6FF}MR+b!nY#EK#o1_p*(%*Ca-w^)l3^U_mqG3TV_6@g+}lPgLTt7C34 z21N<wCRW6QWaHss8pVlV3KpfNlqRR9AjxtSrRL_BrN$SR=0<VkmF9wVM{$D9C@4xz zj^Y6`lS_+=Qu9h~u|brtWGLcgU|{$arJs?Xo2s9bSWuE#T#~BqlAm0fo0?ZrtnZ## zQl4Lwtq+ftcz8_dgQ6HA0gtX?gc7}i%3ER}WqBp>;FN%Hu>dHg%Yl+VF9)Ll69*#; z6AL2?lL#XZBL@@<F!C@J@iQ<mz_SS`yD-DD3CM&RP%*&R%2dOc#w5uQ%%I7X=#MRl z78K=Yr6z;&A|h=<Tm;JKphTRUpHrHfR}7L)&d4uL&5MVMfD@R4MrNJ@Hu3zTl++?k za6$kRgwh{!U?^BABo-AVR)RHRGaM;d5|<Fc#(}&5Pb&%<d8N5YsYM{cqWp5O|B&6F zqmYxDmtK+qlE>l%h|m1MsYH|M77w!4C}Aubz$zgGA1Li}Awu2=6yu;6WZ*3l1Z9*L z%nS@KLDBgVB>M6f0|P@58%Tl&6z8DAACx?#85kHqi4>%;xP^&<p@uPqF`J=CpoAfX zshP2fv4#oES14gfVFvS=Q&_SYiVR8^Qdq%!mK3&ZhGK^rmK2t3mLiW5h7@+NI4f9P zL<vI*2bj+WQdg8w!jQrV=Cgz4D@qtrxWIf4Fu$XOA%z>lui+GDNMV#_Na3mB5@C>F z0L!l^VMyTx$(Jx@F=c^@RH&*Nt{TP^zFy{9?h=+7?q<eX?sSH91}271##)|=q#AA) zhS;cD-Wpy|VO;#KhPQ?@o2BSh2V)j%4R13eBSVTzEnkIB4IfB;0%NgI4IfCw1jZtb z61Ex+m|JSNnwe_(CNLEK3uma|$zrYH1L-Zi70%Gkn8ujGpCZu00kW%^v4%&Sp_V&E zFh!t+0U}b%UBjKl4yqD+85tRBm`gZnxNBINnM4?B`Aax!_(3L>aMkeFuuFp~BY_g` z8ipGF8i5qH6t)_s8rBq%6ww;i8a5Gz6p<9MUhWvCTESYO62>f^6mgI(CA>9)S$ruh zk_^p^E)30#&5X6e=?t9==?t|Z6=fy7HH;wf*sNMnaM<zJ2-k4eh)Ob~NTf*iGS`ZA zFqE*=h&3~sGjuYP2-FDHh&40Sir0u_vDS#!h@~-s-3JPF{u;3o!5S8bZcv2>H9?|; zF-xdMxJIIxsaALbW8s5vhIWQD#uO=Vs0f4HR4ZA+RU%v?+00nOFU^o5Ey9q(*vwQb zRU=p<R3lO&RU?oFYAVPkF5^V23KJ2Pm;$`GELMP4V&F^;CI}S~dGW=Gxdl0?#R^vN zngx{oVHj$l0wQC>)MLr(P{ojZoCmHIpyn#r*(w-;9RxF20p?aDqd~PsF`BCUyi^5{ zxI$*JLZU)(Y6+<DD9+DKRY*%sEGaEYEmqKg7M_|QDNykWDL5d_0kCm-kTw#`m)Kl` z%{}0TRcf&U*o>(7w8Z3+{32MD4Kl<KEQ9b8_SOlylL)j>GK&?G6LXSFb3kp7jMO|x z{1)V7rlu$)RVu*U0k#{|f-q39v4u1vKn#SNf)s2O3>9qb6!P*F3i6AKGm|oNGD|8! zp#-Ya^HLJ?k}DOGQcKEHQ}e)qj0k*?UIUQc+{6lSdy5nUK*3#F3~Cp}7o-+J!XA>o z5P=2`+xX(t0xZF50QMSEu)>&dO$xRO*eub4D}b5`HYhJXGcPT_C^xYrvn(|}C9^Cu z1#cw6Y*SEBz^1h*F|#<e7!)>$Op%wLr;B6>B5gtxgPnn7ED{r&4TxyNWv&iVuEA=r zE$#*~KF7eTZybge<(KBAXdo(l^tQJ?wh)7q2ROnUYkMA);Sp&O#X7JQsD)Ib32h@O z6eZ@R<mcv8Dx?+V=PLMVXrySnXzCm3C={0@=A|SSrNpPCmSuuFM{!1)3gww4844+& ztb)a08(RezZ~(a=Cy^Ae5GdAQ*#T24TnVNW9h{5lRhawX?gBX<1;g|xz=H==G=NGg zaQ`tgF;5{Sv$&)vGpQ8Zm?adlxNRDcAOZOm>?GXXsVGpZI1kb=2KgQCP7QdysG|_1 z1+B_76>PzMNoc1eHAMkh7lLi6#ATKO!Z4T#nRz9t>8V8uIf+TBImO^UDY)cP$V~;c zpNlgx3qa0EO-#;!+5xsA5!`bG$v|DCP*Mu6yEP)>%XAbx<IA9_8`5(G1wcf6nS!lC zlmfJ6oSLGLn3JEEu8<O6rl5gVNPuF#Oc#_pz?n=_0a~vrWafd)20H;{rYG1;kjXFw zd8N6jMft^$tOY3*i!$@lK`xDgn*vn?Hl{SMxD>fdSq!Qul0kiVD}{p8A`mMvJr$%M zq7rThdV!OPTyde4r3zMr%1Aurm@aBL2C8o0MPpt*s1pvaP~c@fq?jubVPIgW;(<jF zsNGY=2db+Qi$S#ph!5^}R9V0)ayw9YUyxr~q>z)Ukd&XNkd~hZ?mp(Fg8Ib@DX9uM z>WMk&`9+x}8M&$8j(-(5xYiE>6=Q}~k{}B(YFv=HRifa8g<R3v*(w-RNuY!kv{D3X zU`qj4(nZ1`f7E&AD5NEpmM9dKrYb<oRRvuIV=Y+OfhbzRx)i|PNG!|GEK*2GEmkN? z%t<XRDh4$_GC{R(u>vUc@=9|+EzX?$qGE{WQgaJ(@-xBgL<JBl4^&;}q-rwXV$R7= zzQtOcoS2h(i={ZTDm582<N@ldf%<WvArFv4izOHs7}6PP7-9u#8EY6j7_!-mSZWw2 zG8M7}Lxvr|-9$~MTTFTew^%^wTa)n?H)LP}-023(7lFEwzqo8b12mv<AiF+TH<FE^ z$^e@0u?`#Q+2rIWC*~B}=^->@xHK4K8Yq2$UCL9-Si%76g&?_7lkpaFW?qRVQ;`&? zg9_@7LL3I_loo+pS0u~8z|ape2;_e@h7e80TkJ)tWvNBQseb9zBJ12AR<+n`a^K>F z7hzFcdGWBu@-1GJh>zmVi-)xZOF-(1QVYOCk|6Eil#^O~O9Wf$y2W0enwg$aQXIvW zmXlc!Um3*-Ru4*Pw|F2`a(r<~VhL)5fjZXUU;quMfP;Yp6k$j~0QN650B$j6g1Ha^ z?)^Ck@4IO-6-k2n{_I7m1v!bysJ4-uJix(ziw#n)X)@m8$W1LtjQ5N$Lvz7OgbQA# zfJPDi|NsAAQ{)y`SYl3Ts%ueEe$g$ayn<V7kbt<w21(GjSU@T37F%LLL26z~5hyTj zu@vVQm4HW{Zm|{=W#*OKVg<Ldia_yuixoV|Qp5@JB5QF*VnOOHw&aZb%;Z$?um*T^ z?-plqYDr3FTAE=>5ol~zQ@Th2q@EpG2o))TcpR|urU*3HQKSqK6M#n<w7q;w7$tc? z%7t5uDYqD1ZV6%L`6%X;__8RD#FP|p9*W{f$%G6B-eOKnNr_?waifY9K~^zF-D0ea zV)l$L1SQwv(wvgwB2e-M36(`LN5q#!38m(M3%ug^lKgl`){GK^l>dmKJjfXAElEsq zXuScBDNUv#O;84s0TG~db4vza?sWo1Cnz2n^f(w1PymSy6O~}(V3uO!VB})tViaKH zVq{?yW0YXzV-#cLW8z`rW8`4sVd7%sU=(4JV`O1uVdP^HV+4utG08CsF$yt?F&4Rk zMw~!N6I6<UViq(+3m%N`0uRP#F@j2kT9z7?EGE#Hqc}qfQwno06C*<jLl$EVQ!`^N zYYi({js+o?!dlB#;Zeiv!Vv3F%MMn_p28~0P{W?WCdrV(-pf?WQNsb&#Uaj6!d%1F z%qY%~0va!ZnqI?}!e-7;%U0+V&XB?t%%I7gI0rVq37M%t8oxsfI)I~IL0iE{A-T9r zAuSVBw3p<ATS}REDWF-2)D(q+#F7kf&F==5&r8frRY0^-ARTtd7^xoElzdR*2sEe< z5d>=mwLPFMr}!Lj%L%R)RT!=sRL&c~6`?sf+8`FJ5KIsnCjq%%!AhaHqzJ4W*}<T8 zAADpL)gcO23WhF_<|SAgnhT&)0ib3Mq#*}Z1o9@BfxF%6m!FpkR#GK^>=KYq-9Ym{ znRzLx6;(p0jseMn+YaFN2dLVB6myV#Sj7U8_tRv##Z-_I#p#k*lIRAS-H2i<$&XJi zF1y8CTvDXTd5Z-U5=EfWJBkn0{3w1j??v&aq{Sn<FNzZ^0BZ9_F(((4L~(&4C?4$h zDE5>z@Q4vO|3V0G#^ozY1(z0}LLs@hY#k`83W0KtFc+f`69}^~axscA$}tvsGcYhb z4+Vp#_L|&9AZ6fo47gENBnm1Fd_V+f;1?nWs*NEmF^~j^t0{1cJw84qKRG@=N}xD9 zCpEDsPY*h@Ra^urm5cO1Y9Xa}ZemFWc;*FC6tm`)<`z^I=`t`dM6nfsx}U{G9w61A ysRc-p4a)vtr$RgluGzr^*t;Ayx%nxjIjMH+Fpsh@@i6i*g3BUi4kZo&t{ebzvoH?; literal 0 HcmV?d00001 diff --git a/late/execute.py b/late/execute.py new file mode 100644 index 0000000..9538308 --- /dev/null +++ b/late/execute.py @@ -0,0 +1,35 @@ +import os +import numpy as np + +from multiviews_datasets_generator import generator_multiviews_dataset, results_to_csv + +n_samples = 200 #Number of samples in tha dataset +n_views = 4 # Number of views in the dataset +n_classes = 2 # Number of classes in the dataset +Z_factor = 1 # Z dim = latent_space_dim * z_factor +R = 0 # Precentage of non-redundant features in the view +n_clusters_per_class = 1 # Number of clusters for each class +class_sep_factor = 100 # Separation between the different classes +n_informative_divid = 1 # Divides the number of informative features in the latent space +standard_deviation = 2 +d = 4 +D = 10 +random_state = 42 +n_outliers = 10 + +path = "/home/baptiste/Documents/Datasets/Generated/outliers_dset/" +if not os.path.exists(path): + os.mkdir(path) + +Z, y, results, unsued_dimensions_percent, n_informative = generator_multiviews_dataset(n_samples, n_views, n_classes, + Z_factor, R, + n_clusters_per_class, + class_sep_factor, + n_informative_divid, d, D, + standard_deviation) +print(unsued_dimensions_percent) +print(n_informative) +print(Z.shape) +changing_labels_indices = np.random.RandomState(random_state).choice(np.arange(y.shape[0]), n_outliers) +y[changing_labels_indices] = np.invert(y[changing_labels_indices].astype(bool)).astype(int) +results_to_csv(path, Z, y, results) \ No newline at end of file diff --git a/late/multiviews_datasets_generator.py b/late/multiviews_datasets_generator.py new file mode 100644 index 0000000..1cce9a0 --- /dev/null +++ b/late/multiviews_datasets_generator.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Nov 26 15:38:38 2019 + +@author: bernardet +""" + +from sklearn.datasets import make_classification +from random import gauss +from math import ceil, floor +import numpy as np +import pandas as pd + + +def latent_space_dimension(views_dimensions_list, R): + """ + Returns the minimal dimension of latent space (enough to build the dataset) for generator_multiviews_dataset compared to views_dimensions_list + + Parameters: + ----------- + views_dimensions_list : list + R : float + + Returns: + -------- + an int + """ + max_view_dimension = max(views_dimensions_list) + dimension = ceil(R*sum(views_dimensions_list)) + + if dimension < max_view_dimension: + dimension = max_view_dimension + + reduced_dimension = dimension + remove_sum = 0 + + for num_view in range(1, len(views_dimensions_list)): + view_prec = views_dimensions_list[num_view - 1] + view_current = views_dimensions_list[num_view] + remove = floor(R*view_prec) + remove_sum += remove + if reduced_dimension - remove < view_current: + dimension += view_current - (reduced_dimension - remove) + reduced_dimension = dimension - remove_sum + + return dimension + + +def projection(latent_space, chosen_columns_list): + """ + Returns the projection of latent_space on the columns of chosen_columns_list (in chosen_columns_list order) + + Parameters: + ----------- + latent_space : array + chosen_columns_list : list + + Returns: + -------- + an array of dimension (number of rows of latent_space, length of chosen_columns_list) + """ + return latent_space[:, chosen_columns_list] + + +def generator_multiviews_dataset(n_samples=1000, n_views=3, n_classes=2, Z_factor=250, R=2/3, n_clusters_per_class=1, class_sep_factor=2, n_informative_divid=2, d=2, D=12, standard_deviation=2): + """ + Returns a generator multiviews dataset + + Parameters: + ----------- + n_samples : int + dataset number of samples (number of rows of dataset) + n_views : int >= 2 + dataset number of views + one view is a set of some features (columns) of the latent space + n_classes : int >= 2 + dataset number of classes + Z_factor : float >= 1 + minimal dimension of the latent space (enough to build the dataset) is calculed then multiplied by Z_factor + R : 0 <= float <= 1 + R = 1 <> no possibility of redundancy between views + R = 0 <> maximal possibility of redundancy between views + n_clusters_per_class : int >= 1 + class_sep_factor : float >= 0 + class_sep = n_clusters_per_class*class_sep_factor + n_informative_divid : float >= 1 + n_informative_divid raises <> number of non-informative features raises + n_informative_divid = 1 <> no non-informative features, number of informative features = dimension of latent space + number of informative features = round(dimension of latent space / n_informative_divid) + d : float >= 1 + minimal dimension of views + dimension of views (int) chosen randomly from N((d+D)/2, standard_deviation^2) with d <= dimension of views <= D + D : float >= d + maximal dimension of views + dimension of views (int) chosen randomly from N((d+D)/2, standard_deviation^2) with d <= dimension of views <= D + standard_deviation : float + standard deviation of the gaussian distribution N((d+D)/2, standard_deviation^2) + dimension of views (int) chosen randomly from N((d+D)/2, standard_deviation^2) with d <= dimension of views <= D + + Returns: + -------- + Z : an array of dimension(n_samples, R*n_views) = the generated samples + y : an array of dimension (n_samples) = the integer labels for class membership of each sample + a list of n_views tuples (X_v, I_v) with : + X_v = Z projected along d_v (= dimension of the v-ith views) columns in I_v + I_v = X_v columns numeros with numberring of Z columns numeros + unsued_dimensions_percent : percentage of unsued columns of latent space in views + n_informative : number of informative features (dimension of latent space - n_informative = number of non informative features) + """ + + if n_views < 2: + raise ValueError("n_views >= 2") + if n_classes < 2: + raise ValueError("n_classes >= 2") + if Z_factor < 1: + raise ValueError("Z_factor >= 1 pour le bon fonctionnement de l'algorithme") + if (R < 0) or (R > 1): + raise ValueError("0 <= R <= 1") + if n_clusters_per_class < 1: + raise ValueError("n_clusters_per_class >= 1") + if class_sep_factor < 0: + raise ValueError("class_sep_factor >= 0") + if n_informative_divid < 1: + raise ValueError("n_informative_divid >= 1") + if d < 1: + raise ValueError("d >= 1") + if (d+D)/2 - 3*standard_deviation < 1: + raise ValueError("Il faut que (d+D)/2 - 3*standard_deviation >= 1 pour avoir des valeurs positives non nulles lors de l'emploi de la loi normale") + + # n_views dimension of view v values randomly from N((d+D)/2, standard_deviation^2) + d_v = np.random.normal(loc=(d+D)/2, scale=standard_deviation, size=n_views) + d_v = list(d_v) + remove_list, add_list = [], [] + for dim_view in d_v: + if dim_view < d or dim_view > D: # 1 <= d <= dim_view <= D + remove_list.append(dim_view) + add = -1 + while add < d or add > D: + add = gauss((d+D)/2, standard_deviation) + add_list.append(add) + d_v = [view for view in d_v if view not in remove_list] + add_list + d_v = [int(view) for view in d_v] # dimension of views = integer + # d_v = list of views dimension from the highest to the lowest + d_v.sort(reverse=True) + # Dimension of latent space Z (multiplied by Z_factor) + dim_Z = Z_factor*latent_space_dimension(d_v, R) + print(dim_Z) + # Number of informative features + n_informative = round(dim_Z/n_informative_divid) + # Generation of latent space Z + Z, y = make_classification(n_samples=n_samples, n_features=dim_Z, n_informative=n_informative, n_redundant=0, + n_repeated=0, n_classes=n_classes, n_clusters_per_class=n_clusters_per_class, weights=None, + flip_y=0.00, class_sep=n_clusters_per_class*class_sep_factor, random_state=None) + + I_q = np.array([i for i in range(Z.shape[1])]) # 1D-array of Z columns numero + meta_I_v = [] + results = [] + for view in range(n_views): + # choice d_v[view] numeros of Z columns uniformly from I_q + I_v = np.random.choice(I_q, size=d_v[view], replace=False) # tirage dans I_q sans remise de taille d_v[view] + meta_I_v += list(I_v) + # projection of Z along the columns in I_v + X_v = projection(Z, I_v) + results.append((X_v, I_v)) + # remove R*d_v[view] columns numeros of I_v form I_q + elements_to_remove = np.random.choice(I_v, size=floor(R*d_v[view]), replace=False) # tirage dans I_v sans remise de taille floor(R*d_v[view]) + I_q = np.setdiff1d(I_q, elements_to_remove) # I_q less elements from elements_to_remove + unsued_dimensions_list = [column for column in I_q if column not in meta_I_v] + unsued_dimensions_percent = round((len(unsued_dimensions_list) / dim_Z)*100, 2) + return Z, y, results, unsued_dimensions_percent, n_informative + + +def results_to_csv(path, latent_space, integer_labels, multiviews_list): + """ + Create length of multiviews_list + 2 csv files to the indicated path + Files name : + latent_space.csv for latent_space + integer_labels.csv for integer_labels + view0.csv for multiviews_list[0] + + Parameters: + ----------- + path : str + latent_space : array + integer_labels : 1D array + multiviews_list : list of tuples + + Returns: + -------- + None + """ + df_latent_space = pd.DataFrame(latent_space) + df_latent_space.to_csv(path+'latent_space.csv', index=False) + + df_labels = pd.DataFrame(integer_labels) + df_labels.to_csv(path+'integer_labels.csv', index=False) + + cpt = 0 + for view_tuple in multiviews_list: + df_view = pd.DataFrame(view_tuple[0], columns=view_tuple[1]) + df_view.to_csv(path+'view'+str(cpt)+'.csv', index=False) + cpt += 1 diff --git a/late/parameters.py b/late/parameters.py new file mode 100644 index 0000000..384af46 --- /dev/null +++ b/late/parameters.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Tue Nov 26 13:53:05 2019 + +@author: bernardet +""" +from sklearn.svm import SVC +from sklearn.naive_bayes import GaussianNB +import numpy as np + +# General parameters +n_samples = 1000 +# number of samples (int) +n_views = 3 +# number of views >= 2 (int) +n_classes = 2 +# number of classes >= 3 (int) +Z_factor = 250 +# multiplication factor of Z dimension (default value = 1) +R = 2/3 +# redondance (float) +cv = 10 +# number of cross-validation splitting (int) +n_clusters_per_class = 1 +# number of clusters per class >= 1 (int) +class_sep_factor = 2 +# factor >= 1 as class_sep = n_clusters_per_class*class_sep_factor +n_informative_divid = 2 +# factor >= 1 as number of informative features = round(dimension of latent space / n_informative_divid) +classifier = "SVM" +# name of classifier (str) +classifier_dictionary={'SVM':SVC(kernel='linear'), 'NB':GaussianNB()} +# dictionary of classifiers +n_samples_list = [100, 500, 1000, 1500, 2000] +# list of number of samples to test generator +R_list = list(np.arange(0, 1.05, 0.05)) +# list of diverse R +Z_factor_list = [1, 3, 10, 25, 100, 250, 1000] +# list of diverse Z_factor +n_views_list = [n_view for n_view in range(2, 10)] +# list of diverse n_views +class_sep_factor_list = [2, 5, 10] +# list of diverse class_sep_factor +n_informative_divid_list = [1, 2, 3] +# list of diverse n_informative_divid +path_data = "/home/bernardet/Documents/StageL3/Data/" +# path to register the multiview dataset +path_graph = "/home/bernardet/Documents/StageL3/Graph/" +# path to register scores graph + +# Parameters of gaussian distribution N((d+D)/2, standard_deviation_2) : +# d <= dim[v] <= D for all v +# (d+D)/2 - 3*sqrt(standard_deviation_2) >= 0 +d = 4 +# < D, > 0 +D = 12 +# > d +standard_deviation = 2 +# standard deviation of the gaussian distribution diff --git a/late/test_generator.py b/late/test_generator.py new file mode 100644 index 0000000..dff19b2 --- /dev/null +++ b/late/test_generator.py @@ -0,0 +1,1140 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Nov 28 14:14:46 2019 + +@author: bernardet +""" + +from multiviews_datasets_generator import generator_multiviews_dataset +from sklearn.svm import SVC +from sklearn.naive_bayes import GaussianNB +from sklearn.model_selection import cross_val_score, StratifiedKFold +from sklearn.metrics import accuracy_score +from collections import Counter +from mpl_toolkits.mplot3d import Axes3D +from math import sqrt +import numpy as np +import matplotlib.pyplot as plt +import pandas as pd +from multimodalboost.mumbo import MumboClassifier + + +def majority_list(predictions_list): + """ + Returns an array which on each row the majority class of the same row in + predictions_list + + Parameters: + ----------- + predictions_list : list of 1D array + + Returns: + -------- + an 1D array + """ + n_samples = len(predictions_list[0]) + # majority_prediction[i] = prediction of predictions_list[i] which appears + #the most on predictions_list[i] + majority_prediction = np.array([-1]*n_samples) + # concatenate_predictions_list[i] = list contains prediction of the i-th + #data per view + reshape_predictions_list = [predictions_list[i].reshape(len(predictions_list[i]), 1) for i in range(len(predictions_list))] + concatenate_predictions_list = np.hstack(reshape_predictions_list) + for sample in range(n_samples): + # dictionary contains predictions (key) and its occurences in + #concatenate_predictions_list[sample] + count = Counter(concatenate_predictions_list[sample]) + maj_value = max(count.values()) # maximal number of a prediction + for key in count.keys(): # searchs the prediction with the maximal + #occurence number + if count[key] == maj_value: + majority_prediction[sample] = key + break + + return majority_prediction + + +def majority_score(views_dictionary, integer_labels, cv=10, classifier="SVM", + classifier_dictionary={'SVM':SVC(kernel='linear'), 'NB':GaussianNB()}): + """ + Returns the mean and the standard deviation of accuracy score when + predictions are selected by majority of predictions of different views + + Parameters: + ----------- + views_dictionary : dict + integer_labels = array + cv : int + classifier : str + classifier_dictionary : dict + + Returns: + -------- + Two floats + """ + skf = StratifiedKFold(n_splits=cv, random_state=1, shuffle=True) + # provides cv train/test indices to split data in cv train/test sets. + prediction_list = [[] for i in range(cv)] # for majority_list function + test_list = [[] for i in range(cv)] # for score + + for key in views_dictionary.keys(): + i = 0 + for train_index, test_index in skf.split(views_dictionary[key], integer_labels): + # splits data and integer label of one view in test and train sets + X = views_dictionary[key] + train, test = X[train_index], X[test_index] + y_train = integer_labels[train_index] + y_test = integer_labels[test_index] + # trains the classifier and tests it with test set + clf = classifier_dictionary[classifier] + clf.fit(train, y_train.ravel()) + y_pred = clf.predict(test) + + prediction_list[i].append(y_pred) + if len(test_list[i]) == 0: # same y_test for all views + test_list[i] = y_test + i += 1 + + score = [] + for i in range(len(prediction_list)): + y_pred_majority = majority_list(prediction_list[i]) + # majority of views predictions + score.append(accuracy_score(test_list[i].ravel(), y_pred_majority)) + # score of majority of views predictions vs expected predictions + score = np.array(score) + return score.mean(), score.std() + + +def score_one_multiview_dataset(cv=10, classifier="SVM", + classifier_dictionary={'SVM':SVC(kernel='linear'), 'NB':GaussianNB()}, + n_samples=1000, n_views=3, n_classes=2, + Z_factor=1, R=2/3, n_clusters_per_class=2, + class_sep_factor=2, n_informative_divid=1, + d=4, D=10, standard_deviation=2): + """ + Returns 3 Series (first with dimensions of latent space, views and + percentage of dimensions of latent space unsued in views, the second with + accuracy score and the third with the standard deivation of accuracy score) + of latent space, views, early fusion predictions (concatenate views + predictions) and late fusion predictions (majority views predictions) + + Parameters: + ----------- + cv : int + classifier : str + classifier_dictionary : dict + n_samples, n_views, n_classes, Z_factor, R, n_clusters_per_class, + class_sep_factor, n_informative_divid, d, D, standard_deviation : parameters + of generator_multiviews_dataset + + Returns: + -------- + 3 Series + """ + # dictionary contains percentage of unsued dimension of latent space and + #dimension of latent space and views + dimensions = {'unsued dimension of latent space':0, "number of informative features":0, 'latent space':0} + dimensions.update({'view'+str(i):0 for i in range(n_views)}) + # dictionary contains and mean of accuracy scores + dict_scores_means = {'latent space':0} + dict_scores_means.update({'view'+str(i):0 for i in range(n_views)}) + dict_scores_means.update({'early fusion':0, 'late fusion':0}) + # dictionary contains standard deviation of accuracy scores + dict_scores_std = {'latent space':[]} + dict_scores_std.update({'view'+str(i):[] for i in range(n_views)}) + dict_scores_std.update({'early fusion':[], 'late fusion':[]}) + # dictionary contains data of each view + dict_views = {'view'+str(i):0 for i in range(n_views)} + + Z, y, multiviews_list, unsued_dimensions_percent, n_informative = generator_multiviews_dataset(n_samples, n_views, n_classes, Z_factor, R, n_clusters_per_class, class_sep_factor, n_informative_divid, d, D, standard_deviation) + dimensions["unsued dimension of latent space"] = unsued_dimensions_percent + dimensions["number of informative features"] = n_informative + dimensions["latent space"] = Z.shape + + + for i in range(n_views): + # multiviews_list[i] = (columns / data of view i, numeros of columns of view i) + dict_views['view'+str(i)] = multiviews_list[i][0] + dimensions['view'+str(i)] = multiviews_list[i][0].shape + + early_fusion = np.concatenate([dict_views[key] for key in dict_views.keys()], axis=1) # = concatenation of all views + # dictionary of data + dict_data_df = {'latent space':Z} + dict_data_df.update({'view'+str(i):dict_views['view'+str(i)] for i in range(n_views)}) + dict_data_df.update({'early fusion':early_fusion}) + + for key in dict_data_df.keys(): + clf = classifier_dictionary[classifier] + score = cross_val_score(clf, dict_data_df[key], y, scoring='accuracy', cv=cv) + dict_scores_means[key] = score.mean() + dict_scores_std[key] = score.std() + + mean_majority, std_majority = majority_score(dict_views, y, cv, classifier, classifier_dictionary) + dict_scores_means['late fusion'] = mean_majority + dict_scores_std['late fusion'] = std_majority + + df_dimensions = pd.Series(dimensions) + df_scores_means = pd.Series(dict_scores_means) + df_scores_std = pd.Series(dict_scores_std) + + return df_dimensions, df_scores_means, df_scores_std + + +def score_multiviews_n_samples(n_samples_list, path_graph, cv=10, classifier="SVM", + classifier_dictionary={'SVM':SVC(kernel='linear'), 'NB':GaussianNB()}, + n_views=3, n_classes=2, Z_factor=1, R=2/3, + n_clusters_per_class=2, class_sep_factor=2, + n_informative_divid=1, d=4, D=10, standard_deviation=2): + """ + Returns 2 DataFrames (first with accuracy score and the second with the + standard deivation of accuracy score) of latent space, views, early fusion + predictions (concatenate views predictions) and late fusion predictions + (majority views predictions) with n_samples_list as index for the indicated + classifier + Creates and saves (at the indicated path path_graph) a graph represented + accuracy score (with confidence interval) vs n_samples_list + + Parameters: + ----------- + n_samples_list : list + each element from n_samples_list defines a new dataset + with element samples + path_graph : str + path to save graphics + cv : int + classifier : str + classifier_dictionary : dict + n_views, n_classes, Z_factor, R, n_clusters_per_class, class_sep_factor, + n_informative_divid, d, D, standard_deviation : parameters of generator_multiviews_dataset + + Returns: + -------- + 2 DataFrames with n_samples_list as index + """ + # n_samples_list = list of samples dimension from the lowest to the highest + n_samples_list.sort(reverse=False) + # list of percentage of unsued columns of latent space in views + unsued_dimensions_percent_list = [] + # list of number of informative features of latent space + n_informative_list = [] + # dictionary contains mean of accuracy scores per n_samples + dict_scores_means = {'latent space':[]} + dict_scores_means.update({'view'+str(i):[] for i in range(n_views)}) + dict_scores_means.update({'early fusion':[], 'late fusion':[]}) + # dictionary contains standard deviation of accuracy scores per n_samples + dict_scores_std = {'latent space':[]} + dict_scores_std.update({'view'+str(i):[] for i in range(n_views)}) + dict_scores_std.update({'early fusion':[], 'late fusion':[]}) + # dictionary contains data of each view + dict_views = {'view'+str(i):0 for i in range(n_views)} + + for n_samples in n_samples_list: + Z, y, multiviews_list, unsued_dimensions_percent, n_informative = generator_multiviews_dataset(n_samples, n_views, n_classes, Z_factor, R, n_clusters_per_class, class_sep_factor, n_informative_divid, d, D, standard_deviation) + unsued_dimensions_percent_list.append(unsued_dimensions_percent) + n_informative_list.append(n_informative) + + + for i in range(n_views): + # multiviews_list[i] = (columns / data of view i, numeros of columns of view i) + dict_views['view'+str(i)] = multiviews_list[i][0] + + early_fusion = np.concatenate([dict_views[key] for key in dict_views.keys()], axis=1) + # = concatenation of all views + # dictionary of data + dict_data = {'latent space':Z} + dict_data.update({'view'+str(i):dict_views['view'+str(i)] for i in range(n_views)}) + dict_data.update({'early fusion':early_fusion}) + + for key in dict_data.keys(): + clf = classifier_dictionary[classifier] + score = cross_val_score(clf, dict_data[key], y, scoring='accuracy', cv=cv) + dict_scores_means[key].append(score.mean()) + dict_scores_std[key].append(score.std()) + + mean_majority, std_majority = majority_score(dict_views, y, cv, classifier, classifier_dictionary) + dict_scores_means['late fusion'].append(mean_majority) + dict_scores_std['late fusion'].append(std_majority) + + df_scores_means = pd.DataFrame(dict_scores_means, index=n_samples_list) + df_scores_std = pd.DataFrame(dict_scores_std, index=n_samples_list) + + plt.figure() + for key in dict_scores_means.keys(): + plt.errorbar(n_samples_list, dict_scores_means[key], 1.96*np.array(dict_scores_std[key])/sqrt(cv), label=key) + # index and label for graphic + label_index = [] + for n_samples, percent, n_informative in zip(n_samples_list, unsued_dimensions_percent_list, n_informative_list): + label_index.append(str(n_samples)+'\n'+str(percent)+'\n'+str(n_informative)) + + plt.xticks(n_samples_list, label_index, fontsize='medium', multialignment='center') # new x indexes + plt.xlabel("Number of samples\nPercentage of dimensions of latent space unsued in views\nNumber of informative features") + plt.ylabel("Accuracy score for "+classifier) + plt.legend(bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) + plt.title("number of views = "+str(n_views)+" - R = "+str(round(R, 4))+"\nfactor of latent space dimension = "+str(Z_factor)+" - number of classes = "+str(n_classes)+"\nAccuracy score vs number of samples for classifier "+classifier) + plt.savefig(path_graph+"score_samples_"+str(n_views)+"_"+classifier+".png", bbox_inches='tight') + plt.show() + plt.close() + + return df_scores_means, df_scores_std + + +def graph_comparaison_classifier_scores_n_samples(classifier1, classifier2, + n_samples_list, path_graph, + cv=10, classifier_dictionary={'SVM':SVC(kernel='linear'), 'NB':GaussianNB()}, + n_views=3, n_classes=2, + Z_factor=1, R=2/3, + n_clusters_per_class=2, + class_sep_factor=2, + n_informative_divid=1, + d=4, D=10, standard_deviation=2): + """ + Creates and saves (at the indicated path path_graph) multiple graphs + represented scores of classifier2 vs scores of classifier1 (one graph per + column of result of score_multiviews_n_samples) + + Parameters: + ----------- + classifier1 : str + classifier2 : str + n_samples_list : list + each element from n_samples_list defines a new dataset + with element samples + path_graph : str + path to save graphics + cv : int + classifier : str + classifier_dictionary : dict + n_views, n_classes, Z_factor, R, n_clusters_per_class, class_sep_factor, + n_informative_divid, d, D, standard_deviation : parameters of generator_multiviews_dataset + + Returns: + -------- + None + """ + df_scores_clf1_means, df_scores_clf1_std = score_multiviews_n_samples(n_samples_list, path_graph, cv, classifier1, classifier_dictionary, n_views, n_classes, Z_factor, R, n_clusters_per_class, class_sep_factor, n_informative_divid, d, D, standard_deviation) + df_scores_clf2_means, df_scores_clf2_std = score_multiviews_n_samples(n_samples_list, path_graph, cv, classifier2, classifier_dictionary, n_views, n_classes, Z_factor, R, n_clusters_per_class, class_sep_factor, n_informative_divid, d, D, standard_deviation) + + n_samples_list = df_scores_clf1_means.index + keys = df_scores_clf1_means.keys() + + for key in keys: + plt.figure() + plt.scatter(df_scores_clf1_means[key].values, df_scores_clf2_means[key].values, c=df_scores_clf1_means[key].values) + plt.plot([0.0, 1.1], [0.0, 1.1], "--", c=".7") # diagonal + plt.xlabel("Accuracy score for "+classifier1) + plt.ylabel("Accuracy score for "+classifier2) + plt.xlim(0, 1) + plt.ylim(0, 1) + plt.title("number of views = "+str(n_views)+" - R = "+str(round(R, 4))+" - number of classes = "+str(n_classes)+"\nAccuracy score of "+key+" for "+classifier2+" vs "+classifier1) + plt.savefig(path_graph+classifier1+"_"+classifier2+"_"+str(n_views)+"_"+key+".png") + plt.show() + plt.close() + + +def score_multiviews_R(R_list, path_graph, cv=10, classifier="SVM", + classifier_dictionary={'SVM':SVC(kernel='linear'), 'NB':GaussianNB()}, + n_samples=1000, n_views=3, n_classes=2, Z_factor=1, + n_clusters_per_class=2, class_sep_factor=2, + n_informative_divid=1, d=4, D=10, standard_deviation=2): + """ + Returns 2 DataFrames (first with accuracy score and the second with the + standard deivation of accuracy score) of latent space, views, early fusion + predictions (concatenate views predictions) and late fusion predictions + (majority views predictions) with R_list as index for the indicated + classifier + Creates and saves (at the indicated path path_graph) a graph represented + accuracy score (with confidence interval) vs R_list + + Parameters: + ----------- + R_list : list + each element from R_list defines a new dataset with element as R + path_graph : str + path to save graphics + cv : int + classifier : str + classifier_dictionary : dict + n_samples, n_views, n_classes, Z_factor, n_clusters_per_class, + class_sep_factor, n_informative_divid, d, D, standard_deviation : parameters + of generator_multiviews_dataset + + Returns: + -------- + 2 DataFrames with R_list as index + """ + # R_list = list of diverse values of R from the lowest to the highest + R_list.sort(reverse=False) + # list of percentage of unsued columns of latent space in views + unsued_dimensions_percent_list = [] + # list of number of informative features of latent space + n_informative_list = [] + # dictionary contains mean of accuracy scores per R + dict_scores_means = {'latent space':[]} + dict_scores_means.update({'view'+str(i):[] for i in range(n_views)}) + dict_scores_means.update({'early fusion':[], 'late fusion':[]}) + # dictionary contains standard deviation of accuracy scores per R + dict_scores_std = {'latent space':[]} + dict_scores_std.update({'view'+str(i):[] for i in range(n_views)}) + dict_scores_std.update({'early fusion':[], 'late fusion':[]}) + # dictionary contains data of each view + dict_views = {'view'+str(i):0 for i in range(n_views)} + + for R in R_list: + Z, y, multiviews_list, unsued_dimensions_percent, n_informative = generator_multiviews_dataset(n_samples, n_views, n_classes, Z_factor, R, n_clusters_per_class, class_sep_factor, n_informative_divid, d, D, standard_deviation) + unsued_dimensions_percent_list.append(unsued_dimensions_percent) + n_informative_list.append(n_informative) + + for i in range(n_views): + # multiviews_list[i] = (columns / data of view i, numeros of columns of view i) + dict_views['view'+str(i)] = multiviews_list[i][0] + + early_fusion = np.concatenate([dict_views[key] for key in dict_views.keys()], axis=1) + # = concatenation of all views + # dictionary of data + dict_data_df = {'latent space':Z} + dict_data_df.update({'view'+str(i):dict_views['view'+str(i)] for i in range(n_views)}) + dict_data_df.update({'early fusion':early_fusion}) + + for key in dict_data_df.keys(): + clf = classifier_dictionary[classifier] + score = cross_val_score(clf, dict_data_df[key], y, scoring='accuracy', cv=cv) + dict_scores_means[key].append(score.mean()) + dict_scores_std[key].append(score.std()) + + mean_majority, std_majority = majority_score(dict_views, y, cv, classifier, classifier_dictionary) + dict_scores_means['late fusion'].append(mean_majority) + dict_scores_std['late fusion'].append(std_majority) + + df_scores_means = pd.DataFrame(dict_scores_means, index=R_list) + df_scores_std = pd.DataFrame(dict_scores_std, index=R_list) + + plt.figure() + for key in dict_scores_means.keys(): + plt.errorbar(R_list, dict_scores_means[key], 1.96*np.array(dict_scores_std[key])/sqrt(cv), label=key) + # index and label for graphic + label_index = [] + R_label = [] + for i in range(0, len(R_list), 4): + R_label.append(R_list[i]) + label_index.append(str(round(R_list[i], 2))+'\n'+str(unsued_dimensions_percent_list[i])+'\n'+str(n_informative_list[i])) + + plt.xticks(R_label, label_index, fontsize='medium', multialignment='center') # new x indexes + plt.xlabel("R\nPercentage of dimensions of latent space unsued in views\nNumber of informative features") + plt.ylabel("Accuracy score for "+classifier) + plt.legend(bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) + plt.title("number of views = "+str(n_views)+" - number of samples = "+str(n_samples)+"\nfactor of latent space dimension = "+str(Z_factor)+" - number of classes = "+str(n_classes)+"\nAccuracy score vs R for classifier "+classifier) + plt.savefig(path_graph+"score_R_"+str(n_views)+"_"+str(n_samples)+"_"+str(Z_factor)+"_"+classifier+".png", bbox_inches='tight') + plt.show() + plt.close() + + return df_scores_means, df_scores_std + +def score_multiviews_Z_factor(Z_factor_list, path_graph, cv=10, classifier="SVM", + classifier_dictionary={'SVM':SVC(kernel='linear'), 'NB':GaussianNB()}, + n_samples=1000, n_views=3, n_classes=2, R=2/3, + n_clusters_per_class=2, class_sep_factor=2, + n_informative_divid=1, d=4, D=10, standard_deviation=2): + """ + Returns 3 DataFrames (first with accuracy score, the second with the + standard deivation of accuracy score and the third with the error rate) of + latent space, views, early fusion predictions (concatenate views + predictions) and late fusion predictions (majority views predictions) with + sum of views dimension divided by Z_factor_list as index for the indicated + classifier + Creates and saves (at the indicated path path_graph) a graph represented + accuracy score vs sum of views dimension divided by Z_factor_list and a + graph represented error rate (1 - accuracy score) vs sum of views dimension + divided by Z_factor_list + + Parameters: + ----------- + Z_factor_list : list + each element from Z_factor_list defines a new dataset with + element as Z_factor + path_graph : str + path to save graphics + cv : int + classifier : str + classifier_dictionary : dict + n_samples, n_views, n_classes, R, n_clusters_per_class, class_sep_factor, + n_informative_divid, d, D, standard_deviation : parameters of generator_multiviews_dataset + + Returns: + -------- + 3 DataFrames with Z_factor_list as index + """ + # Z_factor_list = list of diverse values of Z_factor from the highest to the lowest + Z_factor_list.sort(reverse=True) + # list of sum of views dimension for each Z_factor_list item + d_v = [] + # list of Z dimension for each Z_factor_list item + Z_dim_list = [] + # list of percentage of unsued columns of latent space in views + unsued_dimensions_percent_list = [] + # list of number of informative features of latent space + n_informative_list = [] + # same views have same colors on each graph + dict_colors = {'latent space':0} + dict_colors.update({'view'+str(i):0 for i in range(n_views)}) + prop_cycle = plt.rcParams['axes.prop_cycle'] + colors = prop_cycle.by_key()['color'] + for key, c in zip(dict_colors.keys(), colors): + dict_colors[key] = c + dict_colors.update({'early fusion':'purple', 'late fusion':'maroon'}) + # dictionary contains mean of accuracy scores per Z_factor + dict_scores_means = {'latent space':[]} + dict_scores_means.update({'view'+str(i):[] for i in range(n_views)}) + dict_scores_means.update({'early fusion':[], 'late fusion':[]}) + # dictionary contains error rate per Z_factor + dict_scores_error = {'latent space':[]} + dict_scores_error.update({'view'+str(i):[] for i in range(n_views)}) + dict_scores_error.update({'early fusion':[], 'late fusion':[]}) + # dictionary contains standard deviation of accuracy scores per Z_factor + dict_scores_std = {'latent space':[]} + dict_scores_std.update({'view'+str(i):[] for i in range(n_views)}) + dict_scores_std.update({'early fusion':[], 'late fusion':[]}) + # dictionary contains data of each view + dict_views = {'view'+str(i):0 for i in range(n_views)} + + for Z_factor in Z_factor_list: + Z, y, multiviews_list, unsued_dimensions_percent, n_informative = generator_multiviews_dataset(n_samples, n_views, n_classes, Z_factor, R, n_clusters_per_class, class_sep_factor, n_informative_divid, d, D, standard_deviation) + unsued_dimensions_percent_list.append(unsued_dimensions_percent) + n_informative_list.append(n_informative) + + for i in range(n_views): + # multiviews_list[i] = (columns / data of view i, numeros of columns of view i) + dict_views['view'+str(i)] = multiviews_list[i][0] + + early_fusion = np.concatenate([dict_views[key] for key in dict_views.keys()], axis=1) + # = concatenation of all views + # dimension = number of columns + d_v.append(early_fusion.shape[1]) + Z_dim_list.append(Z.shape[1]) + # dictionary of data + dict_data_df = {'latent space':Z} + dict_data_df.update({'view'+str(i):dict_views['view'+str(i)] for i in range(n_views)}) + dict_data_df.update({'early fusion':early_fusion}) + + for key in dict_data_df.keys(): + clf = classifier_dictionary[classifier] + score = cross_val_score(clf, dict_data_df[key], y, scoring='accuracy', cv=cv) + dict_scores_means[key].append(score.mean()) + dict_scores_error[key].append(1 - score.mean()) + dict_scores_std[key].append(score.std()) + + mean_majority, std_majority = majority_score(dict_views, y, cv, classifier, classifier_dictionary) + dict_scores_means['late fusion'].append(mean_majority) + dict_scores_error['late fusion'].append(1 - mean_majority) + dict_scores_std['late fusion'].append(std_majority) + + d_v_divid_Z = np.divide(np.array(d_v), np.array(Z_dim_list)) + + df_scores_means = pd.DataFrame(dict_scores_means, index=d_v_divid_Z) + df_scores_error = pd.DataFrame(dict_scores_error, index=d_v_divid_Z) + df_scores_std = pd.DataFrame(dict_scores_std, index=d_v_divid_Z) + + # index and label for graphics + label_index = [chr(i) for i in range(ord('a'),ord('z')+1)] + label_index = label_index[0:len(d_v)] + label_value = "" + for label, v_Z, dim_v, dim_Z, Z_factor, percent, n_informative in zip(label_index, d_v_divid_Z, d_v, Z_dim_list, Z_factor_list, unsued_dimensions_percent_list, n_informative_list): + label_value = label_value + label+" : V/Z = "+str(round(v_Z, 4))+", V = "+str(dim_v)+", Z = "+str(dim_Z)+", Z_factor = "+str(Z_factor)+", % ="+str(percent)+", n_informative = "+str(n_informative)+'\n' + + x_label = "V/Z = sum of views dimension divided by latent space dimension with :\nV = sum of views dimension\nZ = latent space dimension multiplied by Z_factor\n% = percentage of dimensions of latent space unsued in views\nn_informative = number of informative features" + + plt.figure(figsize=(10, 10)) # accuracy score vs d_v_divid_Z + for key in dict_scores_means.keys(): + plt.semilogx(d_v_divid_Z, dict_scores_means[key], '.-', color=dict_colors[key], label=key) + + plt.xticks(d_v_divid_Z, label_index, fontsize='medium', multialignment='center') # new x indexes + plt.text(plt.xlim()[1]+0.05, plt.ylim()[1]-(plt.ylim()[1]-plt.ylim()[0])/2, label_value) + plt.xlabel(x_label) + plt.ylabel("Accuracy score for "+classifier) + plt.legend(bbox_to_anchor=(1.04, 1), loc="center left", borderaxespad=0) + plt.title("number of views = "+str(n_views)+" - number of samples = "+str(n_samples)+"\nR = "+str(round(R, 4))+" - number of classes = "+str(n_classes)+"\nAccuracy score vs ratio sum of views dimension / latent space dimension for classifier "+classifier) + plt.savefig(path_graph+"score_Z_factor_"+str(n_views)+"_"+str(n_samples)+"_"+classifier+".png", bbox_inches='tight') + plt.show() + plt.close() + + plt.figure(figsize=(10, 10)) # error rate vs d_v_divid_Z + for key in dict_scores_means.keys(): + plt.semilogx(d_v_divid_Z, dict_scores_error[key], '.-', color=dict_colors[key], label=key) + plt.xticks(d_v_divid_Z, label_index, fontsize='medium', multialignment='center') # new x indexes + plt.text(plt.xlim()[1]+0.05, plt.ylim()[1]-(plt.ylim()[1]-plt.ylim()[0])/2, label_value) + plt.xlabel(x_label) + plt.ylabel("Error rate for "+classifier) + plt.legend(bbox_to_anchor=(1.04, 1), loc="center left", borderaxespad=0) + plt.title("number of views = "+str(n_views)+" - number of samples = "+str(n_samples)+"\nR = "+str(round(R, 4))+" - number of classes = "+str(n_classes)+"\nError rate vs ratio sum of views dimension / latent space dimension for classifier "+classifier) + plt.savefig(path_graph+"error_Z_factor_"+str(n_views)+"_"+str(n_samples)+"_"+classifier+".png", bbox_inches='tight') + plt.show() + plt.close() + """ + plt.figure(figsize=(10, 10)) + + for key in dict_scores_means.keys(): + plt.errorbar(d_v_divid_Z, dict_scores_means[key], 1.96*np.array(dict_scores_std[key])/sqrt(cv), label=key) + plt.xticks(d_v_divid_Z, label_index, fontsize='medium', multialignment='center') + plt.text(plt.xlim()[1]+0.05, plt.ylim()[1]-(plt.ylim()[1]-plt.ylim()[0])/2, label_value) + plt.xlabel(x_label) + plt.ylabel("Accuracy score for "+classifier) + plt.legend(bbox_to_anchor=(1.04, 1), loc="center left", borderaxespad=0) + plt.title("number of views = "+str(n_views)+" - R = "+str(round(R, 4))+"\nAccuracy score vs ratio sum of views dimension / latent space dimension for classifier "+classifier) + plt.savefig(path_graph+"score_Z_factor_errorbar_"+str(n_views)+"_"+classifier+".png", bbox_inches='tight') + plt.show() + plt.close() + + plt.figure(figsize=(10, 10)) # accuracy score of early fusion divided by + # accuracy score of each view vs d_v_divid_Z + for view in dict_views.keys(): + plt.semilogx(d_v_divid_Z, dict_scores_means['early fusion']/df_scores_means[view], '.-', label='early fusion score divided by '+view+' score') + plt.xticks(d_v_divid_Z, label_index, fontsize='medium', multialignment='center') # new x indexes + plt.text(plt.xlim()[1]+0.05, plt.ylim()[1]-(plt.ylim()[1]-plt.ylim()[0])/2, label_value) + plt.xlabel(x_label) + plt.ylabel("Ratio accuracy score for early fusion / accuracy score for each view for "+classifier) + plt.legend(bbox_to_anchor=(1.04, 1), loc="center left", borderaxespad=0) + plt.title("number of views = "+str(n_views)+" - R = "+str(round(R, 4))+"\nRatio accuracy score for early fusion / accuracy score for each view \nvs ratio sum of views dimension / latent space dimension for classifier "+classifier) + plt.savefig(path_graph+"score_Z_factor_majority_view_divid_"+str(n_views)+"_"+classifier+".png", bbox_inches='tight') + plt.show() + plt.close() + + plt.figure(figsize=(10, 10)) # accuracy score of late fusion divided by + # accuracy score of each view vs d_v_divid_Z + for view in dict_views.keys(): + plt.semilogx(d_v_divid_Z, dict_scores_means['late fusion']/df_scores_means[view], '.-', label='late fusion score divided by '+view+' score') + plt.xticks(d_v_divid_Z, label_index, fontsize='medium', multialignment='center') # new x indexes + plt.text(plt.xlim()[1]+0.05, plt.ylim()[1]-(plt.ylim()[1]-plt.ylim()[0])/2, label_value) + plt.xlabel(x_label) + plt.ylabel("Ratio accuracy score for late fusion / accuracy score for each view for "+classifier) + plt.legend(bbox_to_anchor=(1.04, 1), loc="center left", borderaxespad=0) + plt.title("number of views = "+str(n_views)+" - R = "+str(round(R, 4))+"\nRation accuracy score for late fusion / accuracy score for each view \nvs ratio sum of views dimension / latent space dimension for classifier "+classifier) + plt.savefig(path_graph+"score_Z_factor_all_view_divid_"+str(n_views)+"_"+classifier+".png", bbox_inches='tight') + plt.show() + plt.close() + """ + return df_scores_means, df_scores_std, df_scores_error + + +def score_multiviews_Z_factor_Mumbo(Z_factor_list, path_graph, cv=10, classifier="SVM", + classifier_dictionary={'SVM':SVC(kernel='linear'), 'NB':GaussianNB()}, + n_samples=1000, n_views=3, n_classes=2, + R=2/3, n_clusters_per_class=2, + class_sep_factor=2, n_informative_divid=1, + d=4, D=10, standard_deviation=2): + """ + Returns 3 DataFrames (first with accuracy score, the second with the + standard deivation of accuracy score and the third with the error rate) of + latent space, views, early fusion predictions (concatenate views + predictions) and late fusion predictions (majority views predictions) with + sum of views dimension divided by Z_factor_list as index for the indicated + classifier and for Mumbo classifier + Creates and saves (at the indicated path path_graph) a graph represented + accuracy score vs sum of views dimension divided by Z_factor_list and a + graph represented error rate (1 - accuracy score) vs sum of views dimension + divided by Z_factor_list + + Parameters: + ----------- + Z_factor_list : list + each element from Z_factor_list defines a new dataset with + element as Z_factor + path_graph : str + path to save graphics + cv : int + classifier : str + classifier_dictionary : dict + n_samples, n_views, n_classes, R, n_clusters_per_class, class_sep_factor, + n_informative_divid, d, D, standard_deviation : parameters of generator_multiviews_dataset + + Returns: + -------- + 3 DataFrames with Z_factor_list as index + """ + # Z_factor_list = list of diverse values of Z_factor from the highest to the lowest + Z_factor_list.sort(reverse=True) + # list of sum of views dimension for each Z_factor_list item + d_v = [] + # list of Z dimension for each Z_factor_list item + Z_dim_list = [] + # list of percentage of unsued columns of latent space in views + unsued_dimensions_percent_list = [] + # list of number of informative features of latent space + n_informative_list = [] + # same views have same colors on each graph + dict_colors = {'latent space':0} + dict_colors.update({'view'+str(i):0 for i in range(n_views)}) + prop_cycle = plt.rcParams['axes.prop_cycle'] + colors = prop_cycle.by_key()['color'] + for key, c in zip(dict_colors.keys(), colors): + dict_colors[key] = c + dict_colors.update({'early fusion':'purple', 'late fusion':'maroon', 'Mumbo':'midnightblue'}) + # dictionary contains mean of accuracy scores per Z_factor + dict_scores_means = {'latent space':[]} + dict_scores_means.update({'view'+str(i):[] for i in range(n_views)}) + dict_scores_means.update({'early fusion':[], 'late fusion':[], 'Mumbo':[]}) + # dictionary contains error rate per Z_factor + dict_scores_error = {'latent space':[]} + dict_scores_error.update({'view'+str(i):[] for i in range(n_views)}) + dict_scores_error.update({'early fusion':[], 'late fusion':[], 'Mumbo':[]}) + # dictionary contains standard deviation of accuracy scores per Z_factor + dict_scores_std = {'latent space':[]} + dict_scores_std.update({'view'+str(i):[] for i in range(n_views)}) + dict_scores_std.update({'early fusion':[], 'late fusion':[], 'Mumbo':[]}) + # dictionary contains data of each view + dict_views = {'view'+str(i):0 for i in range(n_views)} + + for Z_factor in Z_factor_list: + Z, y, multiviews_list, unsued_dimensions_percent, n_informative = generator_multiviews_dataset(n_samples, n_views, n_classes, Z_factor, R, n_clusters_per_class, class_sep_factor, n_informative_divid, d, D, standard_deviation) + unsued_dimensions_percent_list.append(unsued_dimensions_percent) + n_informative_list.append(n_informative) + + view_index = [0] # for Mumbo + for i in range(n_views): + # multiviews_list[i] = (columns / data of view i, numeros of columns of view i) + dict_views['view'+str(i)] = multiviews_list[i][0] + view_index.append(len(multiviews_list[i][1])+view_index[i]) + + concat = np.concatenate([dict_views[key] for key in dict_views.keys()], axis=1) + # = concatenation of all views + # dimension = number of columns + d_v.append(concat.shape[1]) + Z_dim_list.append(Z.shape[1]) + # dictionary of data + dict_data_df = {'latent space':Z} + dict_data_df.update({'view'+str(i):dict_views['view'+str(i)] for i in range(n_views)}) + dict_data_df.update({'early fusion':concat}) + + for key in dict_data_df.keys(): + clf = classifier_dictionary[classifier] + score = cross_val_score(clf, dict_data_df[key], y, scoring='accuracy', cv=cv) + dict_scores_means[key].append(score.mean()) + dict_scores_error[key].append(1 - score.mean()) + dict_scores_std[key].append(score.std()) + + mean_majority, std_majority = majority_score(dict_views, y, cv, classifier, classifier_dictionary) + dict_scores_means['late fusion'].append(mean_majority) + dict_scores_error['late fusion'].append(1 - mean_majority) + dict_scores_std['late fusion'].append(std_majority) + # Mumbo + skf = StratifiedKFold(n_splits=cv, random_state=1, shuffle=True) + # provides cv train/test indices to split data in cv train/test sets + score = [] + for train_index, test_index in skf.split(concat, y): + # splits data and integer label of one view in test and train sets + train, test = concat[train_index], concat[test_index] + y_train, y_test = y[train_index], y[test_index] + # trains the classifier and tests it with test set + clf = MumboClassifier() + clf.fit(train, y_train, view_index) + y_pred = clf.predict(test) + score.append(accuracy_score(y_test, y_pred)) + + score = np.array(score) + dict_scores_means['Mumbo'].append(score.mean()) + dict_scores_error['Mumbo'].append(1 - score.mean()) + dict_scores_std['Mumbo'].append(score.std()) + + d_v_divid_Z = np.divide(np.array(d_v), np.array(Z_dim_list)) + + df_scores_means = pd.DataFrame(dict_scores_means, index=d_v_divid_Z) + df_scores_error = pd.DataFrame(dict_scores_error, index=d_v_divid_Z) + df_scores_std = pd.DataFrame(dict_scores_std, index=d_v_divid_Z) + + # index and label for graphics + label_index = [chr(i) for i in range(ord('a'),ord('z')+1)] + label_index = label_index[0:len(d_v)] + label_value = "" + for label, v_Z, dim_v, dim_Z, Z_factor, percent, n_informative in zip(label_index, d_v_divid_Z, d_v, Z_dim_list, Z_factor_list, unsued_dimensions_percent_list, n_informative_list): + label_value = label_value + label+" : V/Z = "+str(round(v_Z, 4))+", V = "+str(dim_v)+", Z = "+str(dim_Z)+", Z_factor = "+str(Z_factor)+", % ="+str(percent)+", n_informative = "+str(n_informative)+'\n' + + x_label = "V/Z = sum of views dimension divided by latent space dimension with :\nV = sum of views dimension\nZ = latent space dimension multiplied by Z_factor\n% = percentage of dimensions of latent space unsued in views\nn_informative = number of informative features" + + plt.figure(figsize=(10, 10)) # accuracy score vs d_v_divid_Z + for key in dict_scores_means.keys(): + plt.semilogx(d_v_divid_Z, dict_scores_means[key], '.-', color=dict_colors[key], label=key) + + plt.xticks(d_v_divid_Z, label_index, fontsize='medium', multialignment='center') # new x indexes + plt.text(plt.xlim()[1]+0.05, plt.ylim()[1]-(plt.ylim()[1]-plt.ylim()[0])/2, label_value) + plt.xlabel(x_label) + plt.ylabel("Accuracy score for "+classifier+" and Mumbo") + plt.legend(bbox_to_anchor=(1.04, 1), loc="center left", borderaxespad=0) + plt.title("number of views = "+str(n_views)+" - number of samples = "+str(n_samples)+"\nR = "+str(round(R, 4))+" - number of classes = "+str(n_classes)+"\nAccuracy score vs ratio sum of views dimension / latent space dimension for classifiers "+classifier+" and Mumbo") + plt.savefig(path_graph+"score_Z_factor_"+str(n_views)+"_"+str(n_samples)+"_Mumbo_"+classifier+".png", bbox_inches='tight') + plt.show() + plt.close() + + plt.figure(figsize=(10, 10)) # error rate vs d_v_divid_Z + for key in dict_scores_means.keys(): + plt.semilogx(d_v_divid_Z, dict_scores_error[key], '.-', color=dict_colors[key], label=key) + plt.xticks(d_v_divid_Z, label_index, fontsize='medium', multialignment='center') # new x indexes + plt.text(plt.xlim()[1]+0.05, plt.ylim()[1]-(plt.ylim()[1]-plt.ylim()[0])/2, label_value) + plt.xlabel(x_label) + plt.ylabel("Error rate for "+classifier+" and Mumbo") + plt.legend(bbox_to_anchor=(1.04, 1), loc="center left", borderaxespad=0) + plt.title("number of views = "+str(n_views)+" - number of samples = "+str(n_samples)+"\nR = "+str(round(R, 4))+" - number of classes = "+str(n_classes)+"\nError rate vs ratio sum of views dimension / latent space dimension for classifiers "+classifier+" and Mumbo") + plt.savefig(path_graph+"error_Z_factor_"+str(n_views)+"_"+str(n_samples)+"_Mumbo_"+classifier+".png", bbox_inches='tight') + plt.show() + plt.close() + """ + plt.figure(figsize=(10, 10)) # accuracy score of early fusion divided by + # accuracy score of each view vs d_v_divid_Z + for view in dict_views.keys(): + plt.semilogx(d_v_divid_Z, dict_scores_means['early fusion']/df_scores_means[view], '.-', label='early fusion score divided by '+view+' score') + plt.xticks(d_v_divid_Z, label_index, fontsize='medium', multialignment='center') # new x indexes + plt.text(plt.xlim()[1]+0.05, plt.ylim()[1]-(plt.ylim()[1]-plt.ylim()[0])/2, label_value) + plt.xlabel(x_label) + plt.ylabel("Ratio accuracy score for early fusion / accuracy score for each view for "+classifier+" and Mumbo") + plt.legend(bbox_to_anchor=(1.04, 1), loc="center left", borderaxespad=0) + plt.title("number of views = "+str(n_views)+" - R = "+str(round(R, 4))+"\nRatio accuracy score for early fusion / accuracy score for each view \nvs ratio sum of views dimension / latent space dimension for classifiers "+classifier+" and Mumbo") + plt.savefig(path_graph+"score_Z_factor_majority_view_divid_"+str(n_views)+"_Mumbo_"+classifier+".png", bbox_inches='tight') + plt.show() + plt.close() + + plt.figure(figsize=(10, 10)) # accuracy score of late fusion divided by + # accuracy score of each view vs d_v_divid_Z + for view in dict_views.keys(): + plt.semilogx(d_v_divid_Z, dict_scores_means['late fusion']/df_scores_means[view], '.-', label='late fusion score divided by '+view+' score') + plt.xticks(d_v_divid_Z, label_index, fontsize='medium', multialignment='center') # new x indexes + plt.text(plt.xlim()[1]+0.05, plt.ylim()[1]-(plt.ylim()[1]-plt.ylim()[0])/2, label_value) + plt.xlabel(x_label) + plt.ylabel("Ratio accuracy score for late fusion / accuracy score for each view for "+classifier+" and Mumbo") + plt.legend(bbox_to_anchor=(1.04, 1), loc="center left", borderaxespad=0) + plt.title("number of views = "+str(n_views)+" - R = "+str(round(R, 4))+"\nRation accuracy score for late fusion / accuracy score for each view \nvs ratio sum of views dimension / latent space dimension for classifiers "+classifier+" and Mumbo") + plt.savefig(path_graph+"score_Z_factor_all_view_divid_"+str(n_views)+"_"+str(round(R, 4))+"_Mumbo_"+classifier+".png", bbox_inches='tight') + plt.show() + plt.close() + """ + return df_scores_means, df_scores_std, df_scores_error + + +def score_multiviews_n_views_R(n_views_list, R_list, path_graph, cv=10, + classifier="SVM", classifier_dictionary={'SVM':SVC(kernel='linear'), 'NB':GaussianNB()}, + n_samples=1000, n_classes=2, Z_factor=1, + n_clusters_per_class=2, class_sep_factor=2, + n_informative_divid=1, d=4, D=10, standard_deviation=2): + """ + Returns a dictionary with n_views_list as key containing a list of + DataFrames (represented accuracy score divided by accuracy score for R=1 <> + redundancy null) of views, early fusion predictions (concatenate views + predictions and late fusion predictions (majority views predictions) with + R_list as index for the indicated classifier per key + Creates and saves (at the indicated path path_graph) a graph per value of + n_views_list represented accuracy score divided by accuracy score for R=1 + vs R_list + + Parameters: + ----------- + n_views_list : list + each element from n_views_list defines a new dataset with + element as n_views + R_list : list + each element from R_list defines a new dataset with element as R + path_graph : str + path to save graphics + cv : int + classifier : str + classifier_dictionary : dict + n_samples, n_classes, Z_factor, n_clusters_per_class, class_sep_factor, + n_informative_divid, d, D, standard_deviation : parameters of generator_multiviews_dataset + + Returns: + -------- + a dictionary with n_views_list as key containing a list of DataFrames + (represented accuracy score divided by accuracy score for R=1 <> redundancy + null) with R_list as index per value of n_views_list + """ + dict_n_views_R_ratio = {key:0 for key in n_views_list} + # n_views_list = list of diverse values of n_views from the lowest to the highest + n_views_list.sort(reverse=False) + # same views have same colors on each graph + dict_colors = {'view'+str(i):0 for i in range(n_views_list[-1])} + prop_cycle = plt.rcParams['axes.prop_cycle'] + colors = prop_cycle.by_key()['color'] + for key, c in zip(dict_colors.keys(), colors): + dict_colors[key] = c + dict_colors.update({'early fusion':'purple', 'late fusion':'maroon'}) + + for n_views in n_views_list: + # R_list = list of diverse values of R from the lowest to the highest + R_list.sort(reverse=False) + # list of percentage of unsued columns of latent space in views + unsued_dimensions_percent_list = [] + # list of number of informative features of latent space + n_informative_list = [] + # dictionary contains mean of accuracy scores per R + dict_scores_means = {'view'+str(i):[] for i in range(n_views)} + dict_scores_means.update({'early fusion':[], 'late fusion':[]}) + # dictionary of list of scores' mean of view for diverse R divided by + #score's mean of view for R = 1 (<> redundancy null) + dict_scores_ratio_R_1 = {'view'+str(i):0 for i in range(n_views)} + dict_scores_ratio_R_1.update({'early fusion':0, 'late fusion':0}) + # dictionary contains data of each view + dict_views = {'view'+str(i):0 for i in range(n_views)} + + for R in R_list: + Z, y, multiviews_list, unsued_dimensions_percent, n_informative = generator_multiviews_dataset(n_samples, n_views, n_classes, Z_factor, R, n_clusters_per_class, class_sep_factor, n_informative_divid, d, D, standard_deviation) + unsued_dimensions_percent_list.append(unsued_dimensions_percent) + n_informative_list.append(n_informative) + + for i in range(n_views): + # multiviews_list[i] = (columns / data of view i, numeros of columns of view i) + dict_views['view'+str(i)] = multiviews_list[i][0] + + early_fusion = np.concatenate([dict_views[key] for key in dict_views.keys()], axis=1) + # = concatenation of all views + # dictionary of data + dict_data_df = {'view'+str(i):dict_views['view'+str(i)] for i in range(n_views)} + dict_data_df.update({'early fusion':early_fusion}) + + for key in dict_data_df.keys(): + clf = classifier_dictionary[classifier] + score = cross_val_score(clf, dict_data_df[key], y, scoring='accuracy', cv=cv) + dict_scores_means[key].append(score.mean()) + + mean_majority, std_majority = majority_score(dict_views, y, cv, classifier, classifier_dictionary) + dict_scores_means['late fusion'].append(mean_majority) + + for key in dict_scores_means.keys(): + score_R_1 = dict_scores_means[key][-1] # R = 1 = last value of + # R_list => last score value in dict_scores_means[key] + dict_scores_ratio_R_1[key] = np.divide(np.array(dict_scores_means[key]), score_R_1) + + df_scores_ratio_R_1 = pd.DataFrame(dict_scores_ratio_R_1, index=R_list) + + plt.figure() + for key in dict_scores_means.keys(): + plt.plot(R_list, dict_scores_ratio_R_1[key], '.-', color=dict_colors[key], label=key) + # index and label for graphic + label_index = [] + R_label = [] + for i in range(0, len(R_list), 4): + R_label.append(R_list[i]) + label_index.append(str(round(R_list[i], 2))+'\n'+str(unsued_dimensions_percent_list[i])+'\n'+str(n_informative_list[i])) + + plt.xticks(R_label, label_index, fontsize='medium', multialignment='center') # new x indexes + plt.xlabel("R\nPercentage of dimensions of latent space unsued in views\nNumber of informative features") + plt.ylabel("Ratio accuracy score / accuracy score for R = 1 for "+classifier) + plt.legend(bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) + plt.title("number of views = "+str(n_views)+" - number of samples = "+str(n_samples)+"\nfactor of latent space dimension = "+str(Z_factor)+" - number of classes = "+str(n_classes)+"\nRatio accuracy score / accuracy score for R = 1\n(redundancy null) vs R for classifier "+classifier) + plt.savefig(path_graph+"score_R_divid_R_1_"+str(n_views)+"_"+str(n_samples)+"_"+str(Z_factor)+"_"+classifier+".png", bbox_inches='tight') + plt.show() + plt.close() + + dict_n_views_R_ratio[n_views] = df_scores_ratio_R_1 + + plt.figure() + ax = plt.axes(projection="3d") + + for n_views in n_views_list: + for key in dict_n_views_R_ratio[n_views].keys(): + if n_views == n_views_list[-1]: # print legends only once + ax.plot(R_list, dict_n_views_R_ratio[n_views][key], n_views, color=dict_colors[key], label=key) + else: + ax.plot(R_list, dict_n_views_R_ratio[n_views][key], n_views, color=dict_colors[key]) + + ax.set_xlabel("R") + ax.set_ylabel("Ratio accuracy score / accuracy score for R = 1 for "+classifier) + ax.set_zlabel("Number of views") + plt.legend(bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) + plt.title("number of samples = "+str(n_samples)+" - factor of latent space dimension = "+str(Z_factor)+" - number of classes = "+str(n_classes)+"\nRatio accuracy score / accuracy score for R = 1 (redundancy null) vs R, number of views for classifier "+classifier) + plt.savefig(path_graph+"score_R_divid_R_1_all_n_views"+"_"+str(n_samples)+"_"+str(Z_factor)+"_"+classifier+".png", bbox_inches='tight') + plt.show() + plt.close() + + return dict_n_views_R_ratio + + +def score_multiviews_class_sep(class_sep_factor_list, path_graph, cv=10, + classifier="SVM", classifier_dictionary={'SVM':SVC(kernel='linear'), 'NB':GaussianNB()}, + n_views=3, n_samples=1000, n_classes=2, + Z_factor=1, R=2/3, n_clusters_per_class=2, + n_informative_divid=1, d=4, D=10, standard_deviation=2): + """ + Returns 2 DataFrames (first with accuracy score and the second with the + standard deivation of accuracy score) of latent space, views, early fusion + predictions (concatenate views predictions) and late fusion predictions + (majority views predictions) with class_sep_factor_list as index for the + indicated classifier + Creates and saves (at the indicated path path_graph) a graph represented + accuracy score (with confidence interval) vs class_sep_factor_list + + Parameters: + ----------- + class_sep_factor_list : list + each element from n_samples_list defines a new + dataset + path_graph : str + cv : int + classifier : str + classifier_dictionary : dict + n_samples, n_views, n_classes, Z_factor, R, n_clusters_per_class, + n_informative_divid, d, D, standard_deviation : parameters of generator_multiviews_dataset + + Returns: + -------- + 2 DataFrames with n_samples_list as index + """ + # list of percentage of unsued columns of latent space in views + unsued_dimensions_percent_list = [] + # list of number of informative features of latent space + n_informative_list = [] + # dictionary contains mean of accuracy scores per class_sep_factor + dict_scores_means = {'latent space':[]} + dict_scores_means.update({'view'+str(i):[] for i in range(n_views)}) + dict_scores_means.update({'early fusion':[], 'late fusion':[]}) + # dictionary contains standard deviation of accuracy scores per class_sep_factor + dict_scores_std = {'latent space':[]} + dict_scores_std.update({'view'+str(i):[] for i in range(n_views)}) + dict_scores_std.update({'early fusion':[], 'late fusion':[]}) + # dictionary contains data of each view + dict_views = {'view'+str(i):0 for i in range(n_views)} + + for class_sep_factor in class_sep_factor_list: + Z, y, multiviews_list, unsued_dimensions_percent, n_informative = generator_multiviews_dataset(n_samples, n_views, n_classes, Z_factor, R, n_clusters_per_class, class_sep_factor, n_informative_divid, d, D, standard_deviation) + unsued_dimensions_percent_list.append(unsued_dimensions_percent) + n_informative_list.append(n_informative) + + for i in range(n_views): + # multiviews_list[i] = (columns / data of view i, numeros of columns of view i) + dict_views['view'+str(i)] = multiviews_list[i][0] + + early_fusion = np.concatenate([dict_views[key] for key in dict_views.keys()], axis=1) + # = concatenation of all views + # dictionary of data + dict_data = {'latent space':Z} + dict_data.update({'view'+str(i):dict_views['view'+str(i)] for i in range(n_views)}) + dict_data.update({'early fusion':early_fusion}) + + for key in dict_data.keys(): + print('key', key) + clf = classifier_dictionary[classifier] + score = cross_val_score(clf, dict_data[key], y, scoring='accuracy', cv=cv) + dict_scores_means[key].append(score.mean()) + dict_scores_std[key].append(score.std()) + + mean_majority, std_majority = majority_score(dict_views, y, cv, classifier, classifier_dictionary) + dict_scores_means['late fusion'].append(mean_majority) + dict_scores_std['late fusion'].append(std_majority) + + print(dict_scores_means) + + df_scores_means = pd.DataFrame(dict_scores_means, index=class_sep_factor_list) + df_scores_std = pd.DataFrame(dict_scores_std, index=class_sep_factor_list) + + plt.figure() + for key in dict_scores_means.keys(): + plt.errorbar(class_sep_factor_list, dict_scores_means[key], 1.96*np.array(dict_scores_std[key])/sqrt(cv), label=key) + # index and label for graphic + label_index = [] + for n_samples, percent, n_informative in zip(class_sep_factor_list, unsued_dimensions_percent_list, n_informative_list): + label_index.append(str(n_samples)+'\n'+str(percent)+'\n'+str(n_informative)) + + plt.xticks(class_sep_factor_list, label_index, fontsize='medium', multialignment='center') # new x indexes + plt.xlabel("Factor (class_sep = factor*n_clusters_per_class)\nPercentage of dimensions of latent space unsued in views\nNumber of informative features") + plt.ylabel("Accuracy score for "+classifier) + plt.legend(bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) + plt.title("number of views = "+str(n_views)+" - R = "+str(round(R, 4))+"\nfactor of latent space dimension = "+str(Z_factor)+" - number of classes = "+str(n_classes)+"\nAccuracy score vs factor of class_sep for classifier "+classifier) + plt.savefig(path_graph+"score_class_sep_"+str(n_views)+"_"+classifier+".png", bbox_inches='tight') + plt.show() + plt.close() + + return df_scores_means, df_scores_std + + +def score_multiviews_n_informative_divided(n_informative_divid_list, path_graph, + cv=10, classifier="SVM", + classifier_dictionary={'SVM':SVC(kernel='linear'), 'NB':GaussianNB()}, + n_views=3, n_samples=1000, + n_classes=2, Z_factor=1, R=2/3, + n_clusters_per_class=2, + class_sep_factor=2, d=4, D=10, + standard_deviation=2): + """ + Returns 2 DataFrames (first with accuracy score and the second with the + standard deivation of accuracy score) of latent space, views, early fusion + predictions (concatenate views predictions) and late fusion predictions + (majority views predictions) with n_informative_divid_list as index for the + indicated classifier + Creates and saves (at the indicated path path_graph) a graph represented + accuracy score (with confidence interval) vs n_informative_divid_list + + Parameters: + ----------- + n_informative_divid_list : list + each element from n_informative_divid_list + defines a new dataset with element as + n_informative_divid + path_graph : str + cv : int + classifier : str + classifier_dictionary : dict + n_samples, n_views, n_classes, Z_factor, R, n_clusters_per_class, + class_sep_factor, d, D, standard_deviation : parameters of generator_multiviews_dataset + + Returns: + -------- + 2 DataFrames with n_samples_list as index + """ + # list of percentage of unsued columns of latent space in views + unsued_dimensions_percent_list = [] + # list of number of informative features of latent space + n_informative_list = [] + # dictionary contains mean of accuracy scores per n_informative_divid + dict_scores_means = {'latent space':[]} + dict_scores_means.update({'view'+str(i):[] for i in range(n_views)}) + dict_scores_means.update({'early fusion':[], 'late fusion':[]}) + # dictionary contains standard deviation of accuracy scores per + #n_informative_divid + dict_scores_std = {'latent space':[]} + dict_scores_std.update({'view'+str(i):[] for i in range(n_views)}) + dict_scores_std.update({'early fusion':[], 'late fusion':[]}) + # dictionary contains data of each view + dict_views = {'view'+str(i):0 for i in range(n_views)} + + for n_informative_divid in n_informative_divid_list: + Z, y, multiviews_list, unsued_dimensions_percent, n_informative = generator_multiviews_dataset(n_samples, n_views, n_classes, Z_factor, R, n_clusters_per_class, class_sep_factor, n_informative_divid, d, D, standard_deviation) + unsued_dimensions_percent_list.append(unsued_dimensions_percent) + n_informative_list.append(n_informative) + + for i in range(n_views): + # multiviews_list[i] = (columns / data of view i, numeros of columns of view i) + dict_views['view'+str(i)] = multiviews_list[i][0] + + early_fusion = np.concatenate([dict_views[key] for key in dict_views.keys()], axis=1) + # = concatenation of all views + # dictionary of data + dict_data = {'latent space':Z} + dict_data.update({'view'+str(i):dict_views['view'+str(i)] for i in range(n_views)}) + dict_data.update({'early fusion':early_fusion}) + + for key in dict_data.keys(): + clf = classifier_dictionary[classifier] + score = cross_val_score(clf, dict_data[key], y, scoring='accuracy', cv=cv) + dict_scores_means[key].append(score.mean()) + dict_scores_std[key].append(score.std()) + + mean_majority, std_majority = majority_score(dict_views, y, cv, classifier, classifier_dictionary) + dict_scores_means['late fusion'].append(mean_majority) + dict_scores_std['late fusion'].append(std_majority) + + df_scores_means = pd.DataFrame(dict_scores_means, index=n_informative_divid_list) + df_scores_std = pd.DataFrame(dict_scores_std, index=n_informative_divid_list) + + plt.figure() + for key in dict_scores_means.keys(): + plt.errorbar(n_informative_divid_list, dict_scores_means[key], 1.96*np.array(dict_scores_std[key])/sqrt(cv), label=key) + # index and label for graphic + label_index = [] + for n_informative_divid, percent, n_informative in zip(n_informative_divid_list, unsued_dimensions_percent_list, n_informative_list): + label_index.append(str(n_informative_divid)+'\n'+str(percent)+'\n'+str(n_informative)) + + plt.xticks(n_informative_divid_list, label_index, fontsize='medium', multialignment='center') # new x indexes + plt.xlabel("Factor (n_informative = dimension of latent space / factor)\nPercentage of dimensions of latent space unsued in views\nNumber of informative features") + plt.ylabel("Accuracy score for "+classifier) + plt.legend(bbox_to_anchor=(1.04,0.5), loc="center left", borderaxespad=0) + plt.title("number of views = "+str(n_views)+" - R = "+str(round(R, 4))+"\nfactor of latent space dimension = "+str(Z_factor)+" - number of classes = "+str(n_classes)+"\nAccuracy score vs n_informative_divid for classifier "+classifier) + plt.savefig(path_graph+"score_n_informative_"+str(n_views)+"_"+classifier+".png", bbox_inches='tight') + plt.show() + plt.close() + + return df_scores_means, df_scores_std -- GitLab