From 745dd4fd4e0d157dcc36f40c554f5ec075717d8c Mon Sep 17 00:00:00 2001 From: Dominique BENIELLI <dominique.benielli@univ-amu.fr> Date: Tue, 24 Sep 2019 19:10:17 +0200 Subject: [PATCH] doc ip --- README.md | 34 +- data/Plausible.hdf5 | Bin 0 -> 42192 bytes data/Plausible0.hdf5 | Bin 0 -> 9348 bytes data/Plausible1.hdf5 | Bin 0 -> 9348 bytes docs/source/analyzeresult.rst | 4 +- docs/source/api.rst | 4 +- docs/source/conf.py | 41 +- docs/source/execution.rst | 2 +- docs/source/monomulti/exec_classif.rst | 4 +- docs/source/monomulti/metrics.rst | 4 +- .../monomulti/monoview_classifier.ipynb | 100 ---- .../monomulti/multiview_classifier.ipynb | 551 ------------------ .../multiview_classifiers/classifiers.rst | 8 + .../diversity_fusion.rst | 4 +- docs/source/monomulti/utils/multiclass.rst | 4 +- docs/source/monomultidoc.rst | 4 +- docs/source/readme_link.rst | 2 +- multiview_platform/execute.py | 4 +- .../monoview/exec_classif_mono_view.py | 2 +- .../monoview/export_results.py | 6 +- .../monoview/monoview_utils.py | 4 +- .../utils/execution.py | 2 +- .../utils/get_multiview_db.py | 100 ++-- multiview_platform/tests/test_ExecClassif.py | 2 +- .../tests/tmp_tests/test_file.hdf5 | Bin 0 -> 2072 bytes setup.py | 4 +- 26 files changed, 128 insertions(+), 762 deletions(-) create mode 100644 data/Plausible.hdf5 create mode 100644 data/Plausible0.hdf5 create mode 100644 data/Plausible1.hdf5 delete mode 100644 docs/source/monomulti/monoview_classifier.ipynb delete mode 100644 docs/source/monomulti/multiview_classifier.ipynb create mode 100644 docs/source/monomulti/multiview_classifiers/classifiers.rst create mode 100644 multiview_platform/tests/tmp_tests/test_file.hdf5 diff --git a/README.md b/README.md index b68b72a0..55028c1a 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ [](http://www.gnu.org/licenses/gpl-3.0) -[](https://travis-ci.com/babau1/multiview-machine-learning-omis) +[](https://gitlab.lis-lab.fr/baptiste.bauvin/multiview-machine-learning-omis/badges/develop/build.svg) # Mono- and Multi-view classification benchmark This project aims to be an easy-to-use solution to run a prior benchmark on a dataset and evaluate mono- & multi-view algorithms capacity to classify it correctly. @@ -21,30 +21,36 @@ And the following python modules : * [h5py](https://www.h5py.org) - Used to generate HDF5 datasets on hard drive and use them to spare RAM * [pickle](https://docs.python.org/3/library/pickle.html) - Used to store some results * ([graphviz](https://pypi.python.org/pypi/graphviz) - Used for decision tree interpretation) -* [pandas] +* [pandas](https://pandas.pydata.org/) - -They are all tested in `multiview-machine-mearning-omis/multiview_platform/MonoMutliViewClassifiers/Versions.py` which is automatically checked each time you run the `Exec` script +They are all tested in `multiview-machine-mearning-omis/multiview_platform/MonoMutliViewClassifiers/Versions.py` which is automatically checked each time you run the `execute` script ### Installing -No installation is needed, just the prerequisites. +cd in the project directory +and install the project + +``` +cd multiview-machine-learning-omis +pip install -e . +``` ### Running on simulated data In order to run it you'll need to try on **simulated** data with the command ``` cd multiview-machine-learning-omis/multiview_platform -python Exec.py -log +python execute.py -log ``` -Results will be stored in `multiview-machine-learning-omis/multiview_platform/MonoMultiViewClassifiers/Results/` +Results will be stored in `multiview-machine-learning-omis/multiview_platform/mono_multi_view_classifiers/results/` If you want to run a multiclass (one versus one) benchmark on simulated data, use : ``` cd multiview-machine-learning-omis/multiview_platform -python Exec.py -log --CL_nbClass 3 +python execute.py -log --CL_nbClass 3 ``` -If no path is specified, simulated hdf5 datasets are stored in `multiview-machine-learning-omis/Data` +If no path is specified, simulated hdf5 datasets are stored in `multiview-machine-learning-omis/data` ### Discovering the arguments @@ -52,7 +58,7 @@ If no path is specified, simulated hdf5 datasets are stored in `multiview-machin In order to see all the arguments of this script, their description and default values run : ``` cd multiview-machine-learning-omis/multiview_platform -python Exec.py -h +python execute.py -h ``` The arguments can be passed through a file using `python Exec.py @<path_to_doc>` The file must be formatted with one newline instead of each space : @@ -67,14 +73,14 @@ SVM ``` Moreover, for Monoview algorithms (Multiview is still WIP), it is possible to pass multiple arguments instead of just one. -Thus, executing `python Exec.py --RF_trees 10 100 --RF_max_depth 3 4 --RF_criterion entropy` will result in the generation of several classifiers called +Thus, executing `python execute.py --RF_trees 10 100 --RF_max_depth 3 4 --RF_criterion entropy` will result in the generation of several classifiers called `RandomForest_10_3_entropy`, with 10 trees and a max depth of 3, `RandomForest_10_4_entropy`, with 10 tress and a max depth of 4, `RandomForest_100_3_entropy`, `RandomForest_100_4_entropy` to test all the passed arguments combinations. -### Understanding `Results/` architecture +### Understanding `results/` architecture -Results are stored in `multiview-machine-learning-omis/multiview_platform/MonoMultiViewClassifiers/Results/` +Results are stored in `multiview-machine-learning-omis/multiview_platform/mono_multi_view_classifiers/results/` A directory will be created with the name of the database used to run the script. For each time the script is run, a new directory named after the running date and time will be created. In that directory: @@ -82,7 +88,7 @@ In that directory: * If it is run with one iteration, the iteration results will be stored in the current directory The results for each iteration are graphs plotting the classifiers scores and the classifiers config and results are stored in a directory of their own. -To explore the results run the `Exec` script and go in `multiview-machine-learning-omis/multiview_platform/MonoMultiViewClassifiers/Results/Plausible/` +To explore the results run the `execute` script and go in `multiview-machine-learning-omis/multiview_platform/mono_multi_view_classifiers/results/plausible/` ### Dataset compatibility @@ -118,7 +124,7 @@ One group for the additional data called `Metadata` containing at least 3 attrib In order to run the script on your dataset you need to use : ``` cd multiview-machine-learning-omis/multiview_platform -python Exec.py -log --name <your_dataset_name> --type <.cvs_or_.hdf5> --pathF <path_to_your_dataset> +python execute.py -log --name <your_dataset_name> --type <.cvs_or_.hdf5> --pathF <path_to_your_dataset> ``` This will run a full benchmark on your dataset using all available views and labels. diff --git a/data/Plausible.hdf5 b/data/Plausible.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..947a30ea7b8b7213f075b8c03149c4efd0e5df28 GIT binary patch literal 42192 zcmeD5aB<`1lHy_j0S*oZ76t(j3y%Lo!G$FdAt?7hgvY>;0A(;j=?*Bpf(gQ4WB@@1 z1_=n4K@e&_y1I}cS62oGMh2MqFdC|j!GHzAf|jgliJ1Uy_F13-R_01*rf3>HxO z1T>vNxeQ^MspSR?3}D(2N*ggSF!&@UrREefF!-jHB&H;mB*N6g(yana0F7p3;9>x) z0TZAUssUEP$OI91!3^dyKr#>m13I6XK>(~Dl&c^*IKUcM892b=OiU1iIG{O%nUMja zfI&bVtPn~FL4`qPT$HMc<zaAO0A&eA237_U1`dY2#N1SnQb|Tea1KMa52Ozy&&|NV zAfOLof&9Y2AOKd)z)-;o5i5XDAXRV{G9%c}-vwkREW8TRk!3O2kkDdauwaMC!Q8PT z1Hw;$ibFlkpa2a|h%6=v@)t<-1=PJDHZ}|^Clu1K$qu|24}%9Z{@5AR7&sV;3lfWp zQyCZ}n4s~(0ihWfI2ha<eS%#X7#Ko=LO~Q*0v3)4pTK;K5J6^vk`+jg1BhS%83BnI z+NEOwdx-tCPsgAlfd1jh2F|~*P>1EC6Il>nKY&mS=;anHoi{+!JyaRkED#%9$blFm z1~W1+LIEXSMwr3~VuH-1#5judLcPPt2+eC08AhRIY;FO$3+y-~_rU@UNr-^~9F$-g zysm)QF_1wHaskMGkk9Zsm2^o^K!Jh)C16N32IM{{hBy<_D8@7fs)$<tfE$O^^&m%p zy@pj5;W&^Qh_ex51J8maPLL0&?G|GFOFI`(;v-OqKrq4-2p>cu#Hhvs8A~7qVYdsc z5kz2>2E_tcl4!GGI;fR<usRE`AjrKS49P7Z7P$C;F%Wv8A_M1A6sJI)ix8s@YvAL9 zI{rYI0*^ML(<F*sXpM>xg9bbek^#s_aQRNWOW~eFavzaQuvWNdz&zsZh1&tq2TC<? zAtVM!9HIhA5WxgVfm+dI*o!cP9xRYUA($SvVY35bJBUQcf_R|%3>1V2QG6_j4p8}q zPz4f$Bm;yPHWoZCu}M-U2J!^R8A#~>%mova8I7qMY$%w(ltUJQD1nlY00MI$1hPsv z8yv78dAJA-8SwZ(3N@_Ofm{W06jpU035e&g>cuMvHW;iKuQD1+f)WlW^&@)?<Rb{C zp~ZvTXmBk8s=1H@W}w^*P9~rj!R7#vI0%Dc5t}@w7)Uh)W6F^u0x}Dnh>^nzBnWml zIVR%M12z;);8O*OI#B9H<Z@8!0G}SPYOuSpslqRdO$UB4utg988hFUThhHzTvS8!E zrW30Jt2(eTAOfo_1%jZsfsD5x3;{<i$TAoPyC13&A_)<J$s^HtOabczrwXWgh#?RX zB!sLNp$;SpmIXNg#DHK>W&<S%gm#cPl!wp_5(JkdAhmE4pgsn<2&4mqA@U&0z!<EL z2m-7aOu!9C*99^iG#d#~0OEmEz-SN~<PL}|QUZiZKq#ncuti`3A`B(Lrh`2I69B6L z5ikMdoCmfJBmve3F&d;69fOphVu)I(7?cC0pzeprA&a4#3gW|EfaG789*_wb@dpxw zV@T{FxL|L9xF|-z?10I_XplnWutIh?%mBFkAjd*6L?4udDnzBAa!?8+2f|P;*g0T6 zm_QZ<vC#xULJ$o1EKU^=bqFs(-3y{%Qb;t&7!XFNhf0D4!30<Uq#cSel!6t237C4g zE0NTK0}&kRU{zoivL>)7vH+4^urLY%R}D56A_W%%$03LZ!r+jGng!*9<so{&B+M<K zLImnbh)M_vH36&?L_kHs6teX&tw=Q3UWgjFa)dNk7R51OrC^mHi?CvN*n-ufD1mE) zGhotC0Vo9%#YH2Ofmk4W(A*3X25E-TAdi3o5u_9(4hk`Z7GxG!7NiuU1?+VY2ORKV z9#|ekAdCj_zy^W}Ehq;<Aq)od!Sjh=7MMVC4A^MUY$I3#Y8Zr#VGl$Yg9JGf>}CuZ zka3J~%fRZOJY@Glguos}2t!$LBf*-%JPc<+1R$n>Re=2jvK!$qh#HtHLE=ygP$;l& zh*Gc;FdvnG$b!VcEQEHr5X5%4d%?<J7DF5b)dbQ3!B7E$6ht-10;s7_0dxv13nma2 zgB%1E0vQ0(0!kGyHppclF$jh!gz%8+9#mzhro$B@6oGOi+*eR}uqHGMV6FhGhpU4y zph`jR2MK_bfM^(oO2fENlfa^20^t}03puU9H6o-zESO#-X^0D;5@;0MlSmA(N1^(m z%Aq`v1qddT1yce}$OvOVra)~*2*O!Vy=WBJ2uw?0Hh@$^*-#3k7Gx%JOhW@2p#tO* zQ1C(}z+8|~;2e&u3?zffhnWwu10(}C0<0g2K#v)aLXZZC5@cDB3`i6t3&J2PKzs;> zg#eO=K#C9;!~$sny9h);IAGn7;080n1XL}U4I#iHAYBj)5`@|amI0Xs#h?HKg$GCp z7=zpm#xRu-6<|4J0xFG4fz?8VU=~2iez+hC1H^$?25~473API)2WG$skOB}Jq!C0T zFiZqSBV^#-gN7T(3<yS21Jeehk)%LOm@q^iDhZND!Z4+9Yr%5JK0(qBQ3TckCO~{R z1Evv$26LbUR3$ipLxd1UgQEf>2#N`qQcMw$407TC*DNq|;L70)uyPb0gah^<jDQGX zkRZKKL9j}Y!yw56A_}t=nTDtV`v&Sth+3#VR3pJskZ4Ad02u){608Ii%t%=eECx0m zt_-dn&VVQalhA%Bi~^evk${n4!@!OPi=Y^SumdIxqrqxHy1)WR1Y8S91y~%!fMU2L z#8Rjzm_k<y_BTidA^{;md=#%B8HmXQIS=7sxCw9u*ff|u$Swk@fJ6;c30x6~0agi8 z0yZ1UfGR}hA+bS*LAgjR1+WM#bwLdPQ(y^@RbU>7fQo>aP>c{ku%J>%$ps__D(YcD z0g{5Lf{MYp5C+ISAYBk1L=6%j<Y%ZLBEX>nU^xgILV~;vH3ve$3_@syD}hNNl@Ji~ zP<4WN5Vi1lfN<e5P%g3^U^yfLtORZ~%pNQ{zzQK6Vakz&AQDg#Bn}QdEXF`Q1yTV~ z3}S)QBE>OC2oyU|-5_;nLQokH1(!t_01ge1I2gkeK{5*{1%O1r7%UHtA9rVeAF!3s zg{ELO0|NtS-7H8cXnnC?X>L+#5onb$0|RL7AxIok9<=(HfdRIh6;mFxDwzSi1{9>9 z4MZTVQ-*RGD)?cnQF=54MnhmU1V%$(Gz3ONU^E0qLtr!nMnhmU1V%$(Gz3ONfchZ- z9uvSh+<-jDKz+M$n+P7Hz~d^gt3e|ia3er21rLP4MX)m<Iw7iID!{gZC14`hX|NLT zkP&uy>WWfi8`y&&6R6`RkV$Y1ju+5~5h&b19E2SRH3%_mEa<p6SUJc95CcgaC}u%I zAWM*h$z_704CGj>=0Pk1kqGlaJP<}1K0`=jVL|Kw>%k(6UkIcNgo(EeVID{y2t&pc zK`gAEL};KT3t~MeDDc_>jtYocyy{4o1Q`lSPvnFo>9%3h0rMO2E<m^$!~$hpQ0~BH zHmPC|^FSm*Cx{1fBk}G-=%o$|>;fnOiYq7+N>R-Y$e=xSjH8w*AU}e=OuB&}tq@GQ zUfSt^xClXl0-biYQQKrlJ^;C%+9qQ&4`dGnW0S=u22l+nvB?f`F|fA?Ruu$12{i;% z`4LcwUm7w?M|`;payQ6s;&p(tJy;8Vmkd8yu&2QUb^Hd(wcuJ3t8rk9a7$y=L%tv= z9YA6hn;BqdfE|QQmO5fkJ5bicP{(BarhvQ!%62F@03--&yF-EnYhx6u3><j)Yyl|% ztHo*xHk}aFAQB-9;z3M8h*83Vng=qH5@YD97n_^0iP6JR2z#L{P%@&2#n2Rql441= z7wTq|*oTxzC_+f)f^|UbhLE@vBI&_of=vPGBswo(nlUPZD+FMnOMKdZ6?35I0O<xr zEiTh>i-UB6BNfU3Q6Q5*Gzf#T28afi@<<X;CX7N@08)(51mc0TVyJ^iz)4VN5aA(^ zm!KxX+y*rOqyfYPVGtXJp{hX?ObALNR3ca~Ly>3{)sSI4kh?KefE9x5fnZR4frUT> zsPh4$p%}?R^bmoX4ONKhU$7u3Kp-}N9Rn5v*@Y<vl>)gGCJD9}LV!uA5)d1NVU|N2 z1rveLAY<V2DCR*8!JxqIgAgD&xG^9ODux>eQUk(JX-Kfa1>g*bG#VFP)InSdQiCuT z><p;$kwigy;KslhFdC{HVH{WhAq%nrim?cSgh1{`*alJyS!fB81M%P(rHzCT1+l<p zfEX|g5`eKm?gz6GCPICRHRT|A1+D?%K8O%ZKiF6dQ(%^Z{DGtsDZwB_AtrzV4Pr9L zu^@dgaR`l(o<WKs<{>#3q83Dgq(L+YgOnljL3V+}abd9Ez#2dV1cOL)^&oZF`~gyj ztRAEemz~J!pgxBh2{HwYK}tXvBn`^KSd9Yt5v&@kEJ6ls5R?m{pdug&f)Rla;zJa{ z!Wbd~A<=EZ5CscBeFLIE1vJ>5kN|<|2aCXMfe3<ig6srogJ2L3p$e=3A^|3mWWiE! zm8d+JFuEubAM6~knP38@0~GLJ0T6)@1Unb18$^LHTo!9Uf>c2K4<*4)2Z@68fiez6 z9BeX32#EpVf&^gdL2QU9NGBwK!Ez7+OoHV>&W2zx7m}7BOfVblZ73gNC@9Cl1xR5) zEQOE|eGs=ntVaY2L=NnII1@aK0alI>LQ;V&1G4}|qr?zgA)J9?9x4awcL)V{C71zI z2(tu4gD^OV;0%~5kSItSI7C72K}evmAR0kJpg;!c0Ar8<2*ad7l5o%C@d#8si~^el zB0y&0!cfH^7ekexT7ckzOoCvjx1bc<1gI0Cd{8Jtl%tSPi*U+<9RW20osD7;lmk+O zq#VQqVURFE3^f9j62W>vjsk0fdKxSU@)cYm)XQK+P@h2AAPTMwrV%6vRtAkhC>tV- zoG#!RAU4CfP?aD)2t%v~v%p3{rBNwJv4O=cP<>zzf%6wwA6yhk2gnr=aS#c{AOfTw zMuXW%TEI?2mIT{?B!I*O=|#d|8888I8dw6!V0ffJeGFF!mw<`Db%Pn;V1P-%)q<P} zH5|+Y+Xf;KDnPoy`Va(20Z2L231BXm03{ue1Sr5D0$>tEfQ6t|ft7&-K?O6I#w1W3 zfGQ7m3CLYAVX)^wu7SzGBtT*yg-GIHwIBi_1||_)kVdc&hyl_Kb~ngRAOSE3Dh;6^ zBuEZ|QTZr5kR>1!Aqv5+f`tc22r3N|LZYFzgAD@{AmyO+gy4WhPzXG7$VP+pfV~AW z5X^%UNb2BBh_hkdfLQ{g!CE1vL7AXj4c823KqMhP2D3rRposxw7C7?31V|Pvf~prB zByb^+0+2!wMrK2uiKY(WKCoIa0rD1>);d@iZYoG63`1lPCd2#!cQjZ5BB8?#ficjf zz^+0z55|Ui0;CUuVOD^w1nU9^21E`*f*c8Afn-6dKy0vkK*0==1d%Wsz)B!RGDsMt z8b*VRhS>))0j>qrRbUg4gy6~`dO_|$k_B0S;DW7!8UT?2li)rb$a<JcusX05m;iAh zZU(svq7Wnj5=W-d*&sdygH(brhzBwW?r4wz2qO!E_z(=$0-?Y@gD}BPfHEN}!6FbQ zL>xmkL>Qt3nS?t9T>+dAV?Y8JSudCkwh7FFgbv(5sGE?z1~vj@9tcC_!Mb1s%p8Oq zG^>IXz)eOl;OZgDKyCpW4G{-1A&DO(0TBZm1ZILA17<-9ka8%K00mJACXw6%y{CeH z_Xa5BL2So;Z@?ug-G2evn@9h910eT)Xh2I#M(DjC;22~;+#dqFM}(O{0-V+a7386N z1r->?IT*z2A%Vnz`yLU4sSqU#P<9XsA>AWFNjQS`E5hy(0o_Bw!(ah+A0q=B1L!Uj z(0wGu3=9myjF3H$F#o~!OTuWse1^zWkV0rUK-`Q;Lhc7*z<ECqrjAjOkrV>3d!^9t zmEmD9fTl}!1|HBoIZ2=$ybKHi9H5(3Kq2G+BEWeJ!~m5EAR3{M1FFwCC$YE~r#@JE zM&tuNus+aTL&d2jKB;->C7>&BaM=sX@1PrbK=zN~(GVEkApmQQz_T8>Q3Gy%fZ9D+ zJdaHWLK#>VtQ{eSjfLo^VUwk=7}(9Asu9{6K(!qv3+{EGN)1{bsJ8)%QS5G}#7CfD z0(WyL^E=3ap!9_gJ&VF_3R*x?8<+OfbEfeA|N02=@%h&2V2E5M<PJzPLRLXvq1 zd%@a3k%!e}ggP3sKu&=B0K@?~2PS~97ZKASc?b_7JNQ{3pMm;+2$zCHP%+Hc5Z}S% z5IXR)VCr$_Zj?SLC>Y@eg7iXchYLXx97uxdX#;F1*po=M!I_|*IEGu`veagP-2%3b z5|hEEpb?~-hcJLxy`V@02PD<w2;l-yd_Y_VPd^ArDzPB`0QrPU2GYzJa7a*`UTEgL zA!IO8y29&faEOBmyvmRyA&O~{nxH157UW1a4;3b{VUOQGpwIwk1tbQ39hAs|EP-Hz zDG)x0L=gkU52#2%2x4P_w1F`;N&I3EZ6FfAI&8AAum`oovB|^4Kpp_4VPp)GL#mv~ zDEX00Mi@A>S)ix@W9oPVViYKx2$%<s98exWr~~T-69_T<EU;owBOiNQLZopx1}=$R zFJ6oB>jlZ8`V6~aAax+eVQ+V0)dLDOP-6nCBz{3i?g3Ar;a3Qk1?vELg!m8wc?sk` z;&s4nLoh&wAY*8u1QsHc_YtPxXMqg^2R?pv)RqN@DVg@d+)Pp6QrklmnFsL^B#D8N zHY8vmEU+k80HO*)g3>RD1u4g&YM>NEA4nE~u}VP`6vPaeT5KMH$w6ZclCnX95Efhu zNC<?Xwt)=>rBray1o8vYC?H538is1aa6j05uy&Y1Xd0nhsDD7^2t)*KAdCw#31kq2 z4<0E(*ap{*EDrJmSOTOK9Yc))nFyBxI}z+ps4UD8FdAeb*hGjt)B*?xM#4=4$-)Jp zI+0@mCI_a$rh=>iVXz2<KsE&ARtQE@1U3~WhO7f51~wUD7%B;pgoz*=4YB|(15P8L zIDyH7)q|z67z-8$>4##lVi+4L2{sXC8YCw`Wnikoo&j+Xs$gCMsYHl^)PONa7|Z}` zg*Xf<hDISY!&xAW5LIv?WClnY>;;gq5Op9SI19=^XaOq*DFI=a!$E8)hHAi3074Z( zDUh9DKSB5~tq>kKv4WToVGs$zAQ31AaUnqr6#&H~#C;$ss33?5!cZ<A3aSe1DzGw; z0E*ASqM)P>Vt{19G>Cyvh8!l~)BsWsQUX#7!caEC8n7TL7bFD1*bN5DK@9@4;g+BX zB5^?GBXJ?Y1l0<nK=L3OCkAN-VR(drIUwa|aR3s6D}uWVED9w+Izbr31((4P21qRg zgZLO2q5}~g7z$7X;6V=&g=mJbQAlJ7C>uEjVQOJgFg}t8aXA@H3z&-_;JOeVf=Gf% zkTu9Y011LHO0<Cmzyw4&hy*D>G7-jv*#-)2m>5hahz4Phc61snhN=Oi1d&*vJdm{@ zwdfeG6{HR%38O)5Bu!X@9HAM(f`&Ub^$;mE3!wHuD3BZogLp6+5;6!LNDg8Ws5F6F z4B{ZU7pw}E4-ONsIbZ^&0zpHBAtYQBi3^GwggDqZs6#-C;4XoC9i$#63epFbfM`Rw z45SsQEC#8AV2}h@2bc>@YhX4gL}2EkG!{Twz@;*n0a6QPfGC6xBnc1`<Oh%fFdK<L zSPl_HRs&_DxC%`@R0*ozK!P9)@&Xv6m;kW{RR>fI<VYwJp&u@Y>Nkiw{9*`Mh;d*& za3(?)EDj>zav%;Eqxlys1}0!iVKk&k2;o6UurjFYkwwsjKq&~CRN%_s42U5J5+0OD zu7@dx%R$@%)dUg+sQ_W95X3f+0Fo&PIS30R4K)bNgouLU6UM}(5#}LSs9KQB0y7Z= z$TScSnt9+M4-x~(K^=pUazOIPwt>|`2$)e29;!oNVo2s8OaYq-jVQQ67z5@qSja-e zL0TXz6cQqgFc~BX7DEVuxgc>=46+6+1t#EfFdm2oDFL|>Ov6>c7+@73X_x>w-a(=u z0SFBtL3|Jf^S}fmtdQJ^WDQgis33wdK?a~;Gzo-3FjZ&@V8ZA$G=so=0@4L?35Z5l z3icsPA%uhE6R>JftU*+O41vl*m>_wOG{U9GERZZzIn-oiSvVV_44DKO3FU*CNDhZ4 zP>?K&DBLuNC|Dav5gdcWL7L$TK{6m;fR%z|A$|l2LtP7^z!Fd~uwNho5E3R1;(?6; z=>S!^a7*9}kSI(!j0R}~u|d%VR|sc-WFY22Szzm7Y?xjcA50?z!7P}Qz;ejB7Ayc& z0%l{e2P_Sigc}19f{;)dG>r&Rumo5s+(XC=utF3Mfkog}L3v;bt_*A(*kFh_!axWg zq!6YCCIZTDFfGV(Fg7|3G6Sp^CIRMv2#^2>gJKe9Ge{hQK}ieD0x1DuxIrLMn0llx z2Po-*q(F{?OQR$~kSxd~5QecqG)M=ChKNHfhOiN$AQs3ltl}t61FM7+aF>8uP!OZB z`4=^wAiCkIL0qW4*i$yLC8O-o5Eu=C(GVC7fzc2c4S}H$0uA;I45N573`WzyXc`zz O1EXnRGz|>pGynhqE>SN4 literal 0 HcmV?d00001 diff --git a/data/Plausible0.hdf5 b/data/Plausible0.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..c7e0dd9d3a42182c5879b66d3ac225656171d2e0 GIT binary patch literal 9348 zcmeD5aB<`1lHy_j0S*oZ76t(j3y%LoL5m7R2+I8r;W02IKpBisx&unDV1h6h89<PM zK?1^M5QLhKt}Z0V)s=yPkpX5tjD~7suwa3(3?S4fIl@A~!__eW<mCtu!N9;^0i{ns z(;1Y@;FFk?np4cc;G0^Kn37nM2on#>Of3hc9T08Ez`y{ajTk^F4x&N<LSc}M4D1Y` zw9Nq`!0I6>nUN79paj(cqnQ~b!0H7R<QX7IR)ImBgF(C=s*aU`11!(P!~`~g1DZ3K z85uYrN?<ty!V`iDgB0)Jsfy)cZ~&Xj$iT`V!ob0hmzbLhQYp#E2+moc+zV36fTRy3 z&&I&O&;hjpB+tWO0X2t>0ptddImHYN48n{I49p;ZLj4B|UKs6{&k&hf%)k)r=kEe? z1Jp<c1&}6?ML00XU0^9mFbNI=5UYU|L^3dRK!TM477sU|>L9Y1qy|)oLmJA!L4o2N zBxNB3A_pIalzR*~%e}$p;!zikh5&j9G}tpRP=?)|{e3{?61Wfpl|N7`K;;&Q2`krO z<q$VW5L%A=mF6a;7C~}4tQf+S2bKB^;4%-S6;mEm8-QdWxPl)-kCLMyFd71*Aut*O zqaiRF0;3@?8UmvsFd71*Aut*OqaiRF0@Mou^zmKTxZ(=vxFF0X@E8FD0|R(47$!&o z&B(w49*aByH3K$3fj%zD!(ad&I{=Lb^MJ-(lRzWL3=9Gsu#r{=un92t!NwsF`ZyRM z`kZqTi;HpUgN<(>#%KA!`aomZ#i=Desd?!o84S4Xg^fdl+zE1v0*C;Qr$9zb86m^Y z3@~+&i5~_GJ~M*=I9x#Ewh$vAF@k&ix&u0N8v;=d4h;qdYK>om#%o~X*Pt-sVeo** zhZ+M1LvcZ3QE@5*g9H;~Odk|45QlIuxH<X+yD~5^gan0xD6j&E9MqBE@PUl;(=WX4 zC_-$feRzS=;?NH-17&)J7iivw+TjIp`zSdY0;3^7?GRuDO~!%<^g^9Xi@-*K32X+y zEd#4yWMqKSPzuZi6QG3;$cn*&U;;X~3NamG08|`e2t)!(LWH0siZZAeoB}PifS3jm zf*L?I3T!Ia1*98^2ncL?!6u^yCCDBIR3T8XK~;h&gnt=8sxcIU6d`N_@j#&iQ3~dP z2{0c_fVs$N2&5cgDp(D|Jp3#qZFm(z>;#c`m60I{aXiEwV5ftk0AUWuN~jW0FoRWq m36KOdJP}NATM(uhBn-w#%D{XO0XGB6LGYn0P-BY`MJE7vudLJn literal 0 HcmV?d00001 diff --git a/data/Plausible1.hdf5 b/data/Plausible1.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..c7e0dd9d3a42182c5879b66d3ac225656171d2e0 GIT binary patch literal 9348 zcmeD5aB<`1lHy_j0S*oZ76t(j3y%LoL5m7R2+I8r;W02IKpBisx&unDV1h6h89<PM zK?1^M5QLhKt}Z0V)s=yPkpX5tjD~7suwa3(3?S4fIl@A~!__eW<mCtu!N9;^0i{ns z(;1Y@;FFk?np4cc;G0^Kn37nM2on#>Of3hc9T08Ez`y{ajTk^F4x&N<LSc}M4D1Y` zw9Nq`!0I6>nUN79paj(cqnQ~b!0H7R<QX7IR)ImBgF(C=s*aU`11!(P!~`~g1DZ3K z85uYrN?<ty!V`iDgB0)Jsfy)cZ~&Xj$iT`V!ob0hmzbLhQYp#E2+moc+zV36fTRy3 z&&I&O&;hjpB+tWO0X2t>0ptddImHYN48n{I49p;ZLj4B|UKs6{&k&hf%)k)r=kEe? z1Jp<c1&}6?ML00XU0^9mFbNI=5UYU|L^3dRK!TM477sU|>L9Y1qy|)oLmJA!L4o2N zBxNB3A_pIalzR*~%e}$p;!zikh5&j9G}tpRP=?)|{e3{?61Wfpl|N7`K;;&Q2`krO z<q$VW5L%A=mF6a;7C~}4tQf+S2bKB^;4%-S6;mEm8-QdWxPl)-kCLMyFd71*Aut*O zqaiRF0;3@?8UmvsFd71*Aut*OqaiRF0@Mou^zmKTxZ(=vxFF0X@E8FD0|R(47$!&o z&B(w49*aByH3K$3fj%zD!(ad&I{=Lb^MJ-(lRzWL3=9Gsu#r{=un92t!NwsF`ZyRM z`kZqTi;HpUgN<(>#%KA!`aomZ#i=Desd?!o84S4Xg^fdl+zE1v0*C;Qr$9zb86m^Y z3@~+&i5~_GJ~M*=I9x#Ewh$vAF@k&ix&u0N8v;=d4h;qdYK>om#%o~X*Pt-sVeo** zhZ+M1LvcZ3QE@5*g9H;~Odk|45QlIuxH<X+yD~5^gan0xD6j&E9MqBE@PUl;(=WX4 zC_-$feRzS=;?NH-17&)J7iivw+TjIp`zSdY0;3^7?GRuDO~!%<^g^9Xi@-*K32X+y zEd#4yWMqKSPzuZi6QG3;$cn*&U;;X~3NamG08|`e2t)!(LWH0siZZAeoB}PifS3jm zf*L?I3T!Ia1*98^2ncL?!6u^yCCDBIR3T8XK~;h&gnt=8sxcIU6d`N_@j#&iQ3~dP z2{0c_fVs$N2&5cgDp(D|Jp3#qZFm(z>;#c`m60I{aXiEwV5ftk0AUWuN~jW0FoRWq m36KOdJP}NATM(uhBn-w#%D{XO0XGB6LGYn0P-BY`MJE7vudLJn literal 0 HcmV?d00001 diff --git a/docs/source/analyzeresult.rst b/docs/source/analyzeresult.rst index 2367d0d6..1477e2ef 100644 --- a/docs/source/analyzeresult.rst +++ b/docs/source/analyzeresult.rst @@ -1,5 +1,5 @@ Result analysis module ====================== -.. automodule:: multiview_platform.MonoMultiViewClassifiers.ResultAnalysis - :members: \ No newline at end of file +.. automodule:: multiview_platform.mono_multi_view_classifiers.result_analysis + :members: diff --git a/docs/source/api.rst b/docs/source/api.rst index d5bc51ec..367a94fb 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -2,9 +2,9 @@ Multiview Platform ================== .. toctree:: - :maxdepth: 1 + :maxdepth: 3 :caption: Contents: execution monomultidoc - analyzeresult \ No newline at end of file + analyzeresult diff --git a/docs/source/conf.py b/docs/source/conf.py index f3f304a1..d3f13a6c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -18,11 +18,12 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) - +import os +import sys +sys.path.insert(0, os.path.abspath('.')) +sys.path.insert(0, os.path.abspath('../../multiview_platform')) +sys.path.insert(0, os.path.abspath('../..')) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. @@ -37,17 +38,18 @@ add_module_names = False # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = ['sphinx.ext.autodoc', - 'sphinx.ext.doctest', - 'sphinx.ext.intersphinx', - 'sphinx.ext.todo', - 'nbsphinx', - 'sphinx.ext.coverage', - 'sphinx.ext.mathjax', - 'sphinx.ext.ifconfig', - 'sphinx.ext.viewcode', - 'sphinx.ext.githubpages', - 'sphinx.ext.napoleon', - 'm2r',] +# 'sphinx.ext.doctest', +# 'sphinx.ext.intersphinx', +# 'sphinx.ext.todo', +# 'nbsphinx', + 'sphinx.ext.coverage', + 'sphinx.ext.imgmath', +# 'sphinx.ext.mathjax', +# 'sphinx.ext.ifconfig', +# 'sphinx.ext.viewcode', +# 'sphinx.ext.githubpages', + 'sphinx.ext.napoleon', + 'm2r',] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -55,12 +57,12 @@ templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # -source_suffix = {'.rst': 'restructuredtext', '.md': 'markdown'} +# source_suffix = {'.rst': 'restructuredtext', '.md': 'markdown'} # source_suffix = '.rst' -# source_suffix = ['.rst', '.md'] +source_suffix = ['.rst', '.md'] # source_parsers = { -# '.md': CommonMarkParser, +# '.md': CommonMarkParser, # } # The master toctree document. @@ -103,7 +105,8 @@ todo_include_todos = True # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +# html_theme = 'sphinx_rtd_theme' +html_theme = 'classic' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/docs/source/execution.rst b/docs/source/execution.rst index 3d26fece..b465e63f 100644 --- a/docs/source/execution.rst +++ b/docs/source/execution.rst @@ -1,6 +1,6 @@ Welcome to the exection documentation ===================================== -.. automodule:: multiview_platform.Exec +.. automodule:: multiview_platform.execute :members: diff --git a/docs/source/monomulti/exec_classif.rst b/docs/source/monomulti/exec_classif.rst index fb379570..31dd4af5 100644 --- a/docs/source/monomulti/exec_classif.rst +++ b/docs/source/monomulti/exec_classif.rst @@ -1,6 +1,6 @@ Classification execution module =============================== -.. automodule:: multiview_platform.MonoMultiViewClassifiers.ExecClassif +.. automodule:: multiview_platform.mono_multi_view_classifiers.exec_classif :members: - :inherited-members: \ No newline at end of file + :inherited-members: diff --git a/docs/source/monomulti/metrics.rst b/docs/source/monomulti/metrics.rst index c42b38c4..310b33ff 100644 --- a/docs/source/monomulti/metrics.rst +++ b/docs/source/monomulti/metrics.rst @@ -1,6 +1,6 @@ Metrics framework ================= -.. automodule:: multiview_platform.MonoMultiViewClassifiers.Metrics.framework +.. automodule:: multiview_platform.mono_multi_view_classifiers.metrics.framework :members: - :inherited-members: \ No newline at end of file + :inherited-members: diff --git a/docs/source/monomulti/monoview_classifier.ipynb b/docs/source/monomulti/monoview_classifier.ipynb deleted file mode 100644 index a7e85bbc..00000000 --- a/docs/source/monomulti/monoview_classifier.ipynb +++ /dev/null @@ -1,100 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Monoview classifier framework" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## File addition" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* In the `MonoviewClassifiers` package, you need to add a python module called after your monoview classifier (let's call it MOC for **MO**noview **C**lassifier)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The `MOC.py` file" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In this file, you need to add several functions forthe platform to be able to use your classifier, they are alllisted below : " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### `canProbas`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This function is just used to knowif the classifier can predict a probability for each label instead of just predicting the a label." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def canProbas():\n", - " return True" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### `fit`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This function returns a fitted sklearn classifier object" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "2.7.13" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/docs/source/monomulti/multiview_classifier.ipynb b/docs/source/monomulti/multiview_classifier.ipynb deleted file mode 100644 index 734b0c79..00000000 --- a/docs/source/monomulti/multiview_classifier.ipynb +++ /dev/null @@ -1,551 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# How to add a multiview classifier to the platform" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## File addition " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* In the `Code/MonoMultiViewClassifiers/MultiviewClassifiers` package, add a new package named after your multiview classifier (let's call it NMC for New Multiview Classifier).\n", - "\n", - "* In this package (`Code/MonoMultiViewClassifiers/MultiviewClassifiers/NMC`), add a file called `NMCModule.py` and another one called `analyzeResults.py`. These will be the two files used by the platform to communicate with your implementation.\n", - "\n", - "* You can now add either a package named after your classifier `NMCPackage` and paste your files in it or just add a file with the same name if it is enough." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## `NMCModule.py`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here we will list all the necessary functions of the python module to allow the platform to use NMC" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### The functions" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### `getArgs`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This function is used to multiple arguments dictionaries from one benchmark entry. It must return the `argumentsList` to which it must have add at least a dictionary containing all the necessary information to run NMC. You must add all general fields about the type of classifier and a field called `NMCKWARGS` (`<classifier_name>KWARGS`) conataining another dictionary with classifier-specific agruments (we assume here that NMC has two hyper-parameters : a set of weights and an integer) " - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "arguments = {\"CL_type\":\"NMC\", \n", - " \"views\":[\"all\", \"the\", \"views\", \"names\"],\n", - " \"NB_VIEW\":len([\"all\", \"the\", \"views\", \"names\"]), \n", - " \"viewsIndices\":[\"the indices\", \"of the\", \"views in\", \"the hdf5 file\"], \n", - " \"NB_CLASS\": \"the number of labels of the dataset\", \n", - " \"LABLELS_NAMES\": [\"the names of\", \"the labels used\"], \n", - " \"NMCKWARGS\":{\"weights\":[], \n", - " \"integer\":42,\n", - " \"nbViews\":5}\n", - " }" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To fill these fields, you can use the default values given in argument of the function : " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices):\n", - " argumentsList = []\n", - " nbViews = len(views)\n", - " arguments = {\"CL_type\": \"NMC\",\n", - " \"views\": views,\n", - " \"NB_VIEW\": len(views),\n", - " \"viewsIndices\": viewsIndices,\n", - " \"NB_CLASS\": len(args.CL_classes),\n", - " \"LABELS_NAMES\": args.CL_classes,\n", - " \"NMCKWARGS\": {\"weights\":[],\n", - " \"integer\":42,\n", - " \"nbViews\":5}}\n", - " argumentsList.append(arguments)\n", - " return argumentsList" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This function is also used to add the user-defined configuration for the classifier, but we will discuss it later" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### `genName`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This function is used to generate a short string describing the classifier using its configuration." - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def genName(config):\n", - " return \"NMF\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Some classifiers, like some late fusion classifiers will have more complicated `genName` functions that will need to summurize which monoview classifiers they use in a short string using the `config` argument that is exactly the dictionay called `\"NMCKWARGS\"` in the `getArgs` function" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### `getBenchmark`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This function is used to generate the `benchmark` argument of `getArgs`. It stores all the different configurations that will have to be tested (does not inculde hyper-parameters sets). For example for the Mumbo classifier, il will store the list of possible algorithms to use as weak leaners. " - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def getBenchmark(benchmark, args=None):\n", - " benchmark[\"Multiview\"][\"NMC\"] = [\"Some NMC cnfigurations\"]\n", - " return benchmark" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `benchmark` argument is pre-generated with an entry for all the multiview classifiers so you just need to fill it with the different configurations" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### `genParamsSets`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This function is used to generate random hyper-parameters sets to allow a randomized search to estimate the best one. It works in pair with the `setParams` method implemented in the classifier's class so you need to keep in mind the order of the hyper-paramters you used here.\n", - "\n", - "The `classificationKWARGS` argument is the `\"NMCKWARGS\"` entry seen earlier, and it is highly recommended to use the `randomState` object (which is described [here](https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.random.RandomState.html)) to generate random numbers in order for the results to be reproductible\n", - "\n", - "Assuming our NMC classifier has 2 HP, one weight vector for each view and one integer that can be between 1 and 100, the `genParamsSets` function will look like :" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def genParamsSets(classificationKWARGS, randomState, nIter=1):\n", - " weightsVector = [randomState.random_sample(classificationKWARGS[\"nbViews\"]) for _ in range(nIter)]\n", - " nomralizedWeights = [weights/np.sum(weights) for weights in weightsVector]\n", - " intsVector = list(randomState.randint(1,100,nIter))\n", - " paramsSets = [[normalizedWeight, integer] for normalizedWeight, interger in zip(normalizedWeights, intsVector)]\n", - " return paramsSets" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### The `NMC` class" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It has to be named after the classifier adding `Class` at the end of its name. " - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "class NMCClass:\n", - " pass" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### `init` method" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "There is nothing specific to define in the `__init__` method, you just need to initialize the attributes of your classifier. The `kwargs` argument is the `NMCKWARGS` dictionary seen earlier. In our example, NMC uses two hyper parameters : weights and an int." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def __init__(self, randomState, NB_CORES=1, **kwargs):\n", - " if kwargs[\"weights\"] == []:\n", - " self.weights = randomState.random_sample(classificationKWARGS[\"nbViews\"])\n", - " else:\n", - " self.weights = kwargs[\"weights\"]\n", - " self.weights /= np.sum(self.weights)\n", - " self.integer = kwargs[\"integer\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### `setParams` method" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This method is used to tune your classifier with a set of hyper parameters. The set is a list ordered as in the `genParamsSets` function seen earlier. The input of the `setParams` method is a list of parameters in the right order. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def setParams(self, paramsSet):\n", - " self.weights = paramsSet[0]\n", - " self.integer = paramsSet[1]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### `fit_hdf5` method" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This method is generaly the same as `sklearn`'s `fit` method but uses as an input an HDF5 dataset in order to lower the memory usage of the whole platform.\n", - "* The `DATASET` object is an HDF5 dataset file containing all the views and labels. \n", - "* The `usedIndices` object is a `numpy` 1d-array containing the indices of the examples want to learn from. \n", - "* The `viewsIndices` object is a `numpy` 1d-array containing the indices of the views we want to learn from. " - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def fit_hdf5(self, DATASET, usedIndices=None, viewsIndices=None):\n", - " # Call the fit function of your own module\n", - " pass" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### `predict_hdf5` method" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This method is used as an HDF5-compatible method similar to `sklearn`'s `predict` method. It has the same input than the `fit_hdf5` method but returns a 1d-array containing the labels of the asked examples (ordered as in `usedIndices`)." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "def predict_hdf5(self, DATASET, usedIndices=None, viewsIndices=None):\n", - " # Call the predict function of your own module\n", - " predictedLabels = None # Just to avoid any ipynb running error\n", - " return predictedLabels" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Once you've added everything to the `NMCModule.py` file you are close to be able to run your algorithm on the platform, you just need to fill the `analyzeResults.py` file." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## `analyzeResults.py`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `analyzeResults.py` file is a module used to get a specific result analysis for your classifier. You have, in order to run the platform, to add aunique function called `execute` that will run the analysis and return three different variables : \n", - "* `stringAnalysis` is a string that will be saved in a file to describe the classifier, its performance and may give some insights on the interpretation of it's way to classify. \n", - "* `imagesAnalysis` is a dictionary where you can store images (as values) to describe the classifier & co., the keys will be the images names. \n", - "* `metricsScores` is a dictionary where the values are lists containing train and test scores, and the keys are the metrics names. ( `metricsScores = {\"accuracy_score\":[0.99, 0.10]}` )\n", - "The `execute` function has as inputs : \n", - "* `classifier` is a classifier object from your classifiers class\n", - "* `trainLabels` are the labels predicted for the train set by the classifier\n", - "* `testLabels` are the labels predicted for the test set by the classifier\n", - "* `DATASET` is the HDF5 dataset object\n", - "* `classificationKWARGS` is the dictionary named `NMCKWARGS` earlier\n", - "* `classificationIndices` is a triplet containing the learning indices, the validation indices and the testIndices for multiclass classification\n", - "* `LABELS_DICTIONARY` is a dictionary containing a label as a key and it's name as a value\n", - "* `views` is the list of the views names used by the classifier\n", - "* `nbCores` is an `int` fixing the number of threads used by the platform \n", - "* `times` is a tuple containing the extraction time and the classification time\n", - "* `name` is the name ofthe database on which the plartform is running\n", - "* `KFolds` is an `sklearn` kfold object used for the cross-validation\n", - "* `hyperParamSearch` is the type of the hyper parameters optimization method\n", - "* `nIter` is the number of iterations of the hyper parameters method\n", - "* `metrics` is the list of the metrics and their arguments\n", - "* `viewsIndices` is 1d-array of the indices of the views used for classification\n", - "* `randomState` is a `numpy` RandomState object\n", - "* `labels` are the groud truth labels of the dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The basic function analyzing results for all the classifiers looks like : " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "from ... import Metrics\n", - "from ...utils.MultiviewResultAnalysis import printMetricScore, getMetricsScores\n", - "\n", - "def execute(classifier, trainLabels,\n", - " testLabels, DATASET,\n", - " classificationKWARGS, classificationIndices,\n", - " LABELS_DICTIONARY, views, nbCores, times,\n", - " name, KFolds,\n", - " hyperParamSearch, nIter, metrics,\n", - " viewsIndices, randomState, labels):\n", - " CLASS_LABELS = labels\n", - " learningIndices, validationIndices, testIndicesMulticlass = classificationIndices\n", - "\n", - " metricModule = getattr(Metrics, metrics[0][0])\n", - " if metrics[0][1] is not None:\n", - " metricKWARGS = dict((index, metricConfig) for index, metricConfig in enumerate(metrics[0][1]))\n", - " else:\n", - " metricKWARGS = {}\n", - " scoreOnTrain = metricModule.score(CLASS_LABELS[learningIndices], CLASS_LABELS[learningIndices], **metricKWARGS)\n", - " scoreOnTest = metricModule.score(CLASS_LABELS[validationIndices], testLabels, **metricKWARGS)\n", - "\n", - " # To be modified to fit to your classifier \n", - " classifierConfigurationString = \"with weights : \"+ \", \".join(map(str, list(classifier.weights))) + \", and integer : \"+str(classifier.integer)\n", - " # Modify the name of the classifier in these strings\n", - " stringAnalysis = \"\\t\\tResult for Multiview classification with NMC \"+ \\\n", - " \"\\n\\n\" + metrics[0][0] + \" :\\n\\t-On Train : \" + str(scoreOnTrain) + \"\\n\\t-On Test : \" + str(\n", - " scoreOnTest) + \\\n", - " \"\\n\\nDataset info :\\n\\t-Database name : \" + name + \"\\n\\t-Labels : \" + \\\n", - " ', '.join(LABELS_DICTIONARY.values()) + \"\\n\\t-Views : \" + ', '.join(views) + \"\\n\\t-\" + str(\n", - " KFolds.n_splits) + \\\n", - " \" folds\\n\\nClassification configuration : \\n\\t-Algorithm used : NMC \" + classifierConfigurationString\n", - "\n", - " metricsScores = getMetricsScores(metrics, trainLabels, testLabels,\n", - " validationIndices, learningIndices, labels)\n", - " stringAnalysis += printMetricScore(metricsScores, metrics)\n", - "\n", - " imagesAnalysis = {}\n", - " return stringAnalysis, imagesAnalysis, metricsScores" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Once you have done this, your classifier is ready to be used by the platform, but you can add some description about your classifier in the analyzeResults file. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Adding arguments to avoid hyper parameter optimization" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In order to be able to test a specific set of arguments on this platform, you need to add some lines in the argument parser located in the file `Code/MonoMultiViewClassifiers/utils/execution.py` in the `parseTheArgs` function. What you need to do is to add a group of arguments, allowing you to pass the hyper parameters in the command line :" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "groupNMC = parser.add_argument_group('New Multiview Classifier arguments')\n", - "groupNMC.add_argument('--NMC_weights', metavar='FLOAT', action='store', nargs=\"+\",\n", - " help='Determine the weights of NMC', type=float,\n", - " default=[])\n", - "groupNMC.add_argument('--NMC_integer', metavar='INT', action='store',\n", - " help='Determine the integer of NMC', type=int,\n", - " default=42)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In order for the platform to use these arguments, you need to modify the `getArgs` function of the file `NMCModule.py`. \n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def getArgs(args, benchmark, views, viewsIndices, randomState, directory, resultsMonoview, classificationIndices):\n", - " argumentsList = []\n", - " nbViews = len(views)\n", - " arguments = {\"CL_type\": \"NMC\",\n", - " \"views\": views,\n", - " \"NB_VIEW\": len(views),\n", - " \"viewsIndices\": viewsIndices,\n", - " \"NB_CLASS\": len(args.CL_classes),\n", - " \"LABELS_NAMES\": args.CL_classes,\n", - " \"NMCKWARGS\": {\"weights\":args.NMC_weights, # Modified to take the args into account\n", - " \"integer\":args.NMC_integer, # Modified to take the args into account\n", - " \"nbViews\":5}}\n", - " argumentsList.append(arguments)\n", - " return argumentsList" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python3", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2.0 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "2.7.13" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/docs/source/monomulti/multiview_classifiers/classifiers.rst b/docs/source/monomulti/multiview_classifiers/classifiers.rst new file mode 100644 index 00000000..0ca3191d --- /dev/null +++ b/docs/source/monomulti/multiview_classifiers/classifiers.rst @@ -0,0 +1,8 @@ +Classifiers +=========== + +.. autosummary:: + :toctree: DIRNAME + + multiview_platform.mono_multi_view_classifiers.monoview_classifiers + diff --git a/docs/source/monomulti/multiview_classifiers/diversity_fusion.rst b/docs/source/monomulti/multiview_classifiers/diversity_fusion.rst index 6d8e675c..a60545a2 100644 --- a/docs/source/monomulti/multiview_classifiers/diversity_fusion.rst +++ b/docs/source/monomulti/multiview_classifiers/diversity_fusion.rst @@ -1,5 +1,5 @@ Diversity Fusion Classifiers ============================ -.. automodule:: multiview_platform.MonoMultiViewClassifiers.Multiview.Additions.diversity_utils -:members: +.. automodule:: multiview_platform.mono_multi_view_classifiers.multiview.additions.diversity_utils + :members: diff --git a/docs/source/monomulti/utils/multiclass.rst b/docs/source/monomulti/utils/multiclass.rst index cd863152..9f79bc8d 100644 --- a/docs/source/monomulti/utils/multiclass.rst +++ b/docs/source/monomulti/utils/multiclass.rst @@ -1,6 +1,6 @@ Utils Multiclass module ======================= -.. automodule:: multiview_platform.MonoMultiViewClassifiers.utils.Multiclass +.. automodule:: multiview_platform.mono_multi_view_classifiers.utils.multiclass :members: - :inherited-members: \ No newline at end of file + :inherited-members: diff --git a/docs/source/monomultidoc.rst b/docs/source/monomultidoc.rst index b25fd849..4ada7eec 100644 --- a/docs/source/monomultidoc.rst +++ b/docs/source/monomultidoc.rst @@ -2,11 +2,11 @@ Mono and mutliview classification ================================= .. toctree:: - :maxdepth: 1 + :maxdepth: 3 :caption: Contents: monomulti/metrics - monomulti/monoview_classifier + monomulti/monoview_classifier/classifiers monomulti/multiview_classifier monomulti/exec_classif monomulti/multiview_classifiers/diversity_fusion diff --git a/docs/source/readme_link.rst b/docs/source/readme_link.rst index c27a295c..b80fd35d 100644 --- a/docs/source/readme_link.rst +++ b/docs/source/readme_link.rst @@ -1,4 +1,4 @@ Readme ====== -.. mdinclude:: ../../README.md \ No newline at end of file +.. mdinclude:: ../../README.md diff --git a/multiview_platform/execute.py b/multiview_platform/execute.py index 3dcb05c6..53d4fcc9 100644 --- a/multiview_platform/execute.py +++ b/multiview_platform/execute.py @@ -1,7 +1,7 @@ """This is the execution module, used to execute the code""" -def Exec(): +def exec(): import versions versions.testVersions() import sys @@ -11,4 +11,4 @@ def Exec(): if __name__ == "__main__": - Exec() + exec() diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py index d228a56b..1870e9b7 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/exec_classif_mono_view.py @@ -255,7 +255,7 @@ def saveResults(stringAnalysis, outputFileName, full_labels_pred, y_train_pred, # help='Name of the view used', default='View0') # groupStandard.add_argument('--pathF', metavar='STRING', action='store', # help='Path to the database hdf5 file', -# default='../../../Data/Plausible') +# default='../../../data/Plausible') # groupStandard.add_argument('--directory', metavar='STRING', action='store', # help='Path of the output directory', default='') # groupStandard.add_argument('--labelsNames', metavar='STRING', diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/export_results.py b/multiview_platform/mono_multi_view_classifiers/monoview/export_results.py index b2d969b3..ad9a7f3c 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/export_results.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/export_results.py @@ -14,7 +14,7 @@ import pandas as pd # for Series and DataFrames from matplotlib.offsetbox import AnchoredOffsetbox, TextArea, \ HPacker # to generate the Annotations in plot from pylab import rcParams # to change size of plot -from scipy.interpolate import interp1d # to Interpolate Data +from scipy.interpolate import interp1d # to Interpolate data from sklearn import metrics # For stastics on classification # Import own modules @@ -122,8 +122,8 @@ def showScoreTime(directory, filename, store, resScore, resTime, rangeX, ax1.add_artist(anchored_box) fig.subplots_adjust(top=0.7) - ax1.legend(['Score Data', 'Score Interpolated'], loc='upper left') - ax2.legend(['Time Data', 'Time Interpolated'], loc='lower right') + ax1.legend(['Score data', 'Score Interpolated'], loc='upper left') + ax2.legend(['Time data', 'Time Interpolated'], loc='lower right') plt.title(fig_desc, fontsize=18) diff --git a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py index 9f75e36f..4f3500d7 100644 --- a/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py +++ b/multiview_platform/mono_multi_view_classifiers/monoview/monoview_utils.py @@ -273,7 +273,7 @@ class MonoviewResult(object): # return trainingExamplesIndices -##### Generating Test and Train Data +##### Generating Test and Train data # def calcTrainTestOwn(X,y,split): # # classLabels = pd.Series(y) @@ -383,7 +383,7 @@ class MonoviewResult(object): # This means the oob method is n_observations/3 times faster to train then the leave-one-out method. # -# X_test: Test Data +# X_test: Test data # y_test: Test Labels # num_estimators: number of trees # def MonoviewClassifRandomForest(X_train, y_train, nbFolds=4, nbCores=1, **kwargs): diff --git a/multiview_platform/mono_multi_view_classifiers/utils/execution.py b/multiview_platform/mono_multi_view_classifiers/utils/execution.py index 5a32172b..da36fb45 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/execution.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/execution.py @@ -45,7 +45,7 @@ def parseTheArgs(arguments): # groupStandard.add_argument('--pathF', metavar='STRING', action='store', # help='Path to the hdf5 dataset or database ' # 'folder (default: %(default)s)', -# default='../Data/') +# default='../data/') # groupStandard.add_argument('--nice', metavar='INT', action='store', # type=int, # help='Niceness for the processes', default=0) diff --git a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py index 5821bf02..22246c81 100644 --- a/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py +++ b/multiview_platform/mono_multi_view_classifiers/utils/get_multiview_db.py @@ -610,25 +610,25 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # def getMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES, randomState): # datasetFile = h5py.File(path + "MultiOmic.hdf5", "w") # -# logging.debug("Start:\t Getting Methylation Data") +# logging.debug("Start:\t Getting Methylation data") # methylData = np.genfromtxt(path + "matching_methyl.csv", delimiter=',') # methylDset = datasetFile.create_dataset("View0", methylData.shape) # methylDset[...] = methylData # methylDset.attrs["name"] = "Methyl" # methylDset.attrs["sparse"] = False # methylDset.attrs["binary"] = False -# logging.debug("Done:\t Getting Methylation Data") +# logging.debug("Done:\t Getting Methylation data") # -# logging.debug("Start:\t Getting MiRNA Data") +# logging.debug("Start:\t Getting MiRNA data") # mirnaData = np.genfromtxt(path + "matching_mirna.csv", delimiter=',') # mirnaDset = datasetFile.create_dataset("View1", mirnaData.shape) # mirnaDset[...] = mirnaData # mirnaDset.attrs["name"] = "MiRNA_" # mirnaDset.attrs["sparse"] = False # mirnaDset.attrs["binary"] = False -# logging.debug("Done:\t Getting MiRNA Data") +# logging.debug("Done:\t Getting MiRNA data") # -# logging.debug("Start:\t Getting RNASeq Data") +# logging.debug("Start:\t Getting RNASeq data") # rnaseqData = np.genfromtxt(path + "matching_rnaseq.csv", delimiter=',') # uselessRows = [] # for rowIndex, row in enumerate(np.transpose(rnaseqData)): @@ -640,16 +640,16 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # rnaseqDset.attrs["name"] = "RNASeq_" # rnaseqDset.attrs["sparse"] = False # rnaseqDset.attrs["binary"] = False -# logging.debug("Done:\t Getting RNASeq Data") +# logging.debug("Done:\t Getting RNASeq data") # -# logging.debug("Start:\t Getting Clinical Data") +# logging.debug("Start:\t Getting Clinical data") # clinical = np.genfromtxt(path + "clinicalMatrix.csv", delimiter=',') # clinicalDset = datasetFile.create_dataset("View3", clinical.shape) # clinicalDset[...] = clinical # clinicalDset.attrs["name"] = "Clinic" # clinicalDset.attrs["sparse"] = False # clinicalDset.attrs["binary"] = False -# logging.debug("Done:\t Getting Clinical Data") +# logging.debug("Done:\t Getting Clinical data") # # labelFile = open(path + 'brca_labels_triple-negatif.csv') # labels = np.array([int(line.strip().split(',')[1]) for line in labelFile]) @@ -849,11 +849,11 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # def getKMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES): # datasetFile = h5py.File(path + "KMultiOmic.hdf5", "w") # -# # logging.debug("Start:\t Getting Methylation Data") +# # logging.debug("Start:\t Getting Methylation data") # methylData = np.genfromtxt(path + "matching_methyl.csv", delimiter=',') -# logging.debug("Done:\t Getting Methylation Data") +# logging.debug("Done:\t Getting Methylation data") # -# logging.debug("Start:\t Getting Sorted Methyl Data") +# logging.debug("Start:\t Getting Sorted Methyl data") # Methyl = methylData # sortedMethylGeneIndices = np.zeros(methylData.shape, dtype=int) # MethylRanking = np.zeros(methylData.shape, dtype=int) @@ -864,9 +864,9 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # sortedMethylGeneIndices[exampleIndex] = sortedMethylIndicesArray # for geneIndex in range(Methyl.shape[1]): # MethylRanking[exampleIndex, sortedMethylIndicesArray[geneIndex]] = geneIndex -# logging.debug("Done:\t Getting Sorted Methyl Data") +# logging.debug("Done:\t Getting Sorted Methyl data") # -# logging.debug("Start:\t Getting Binarized Methyl Data") +# logging.debug("Start:\t Getting Binarized Methyl data") # k = findClosestPowerOfTwo(9) - 1 # try: # factorizedLeftBaseMatrix = np.genfromtxt( @@ -884,9 +884,9 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # bMethylDset.attrs["name"] = "BMethyl" + str(k) # bMethylDset.attrs["sparse"] = False # bMethylDset.attrs["binary"] = True -# logging.debug("Done:\t Getting Binarized Methyl Data") +# logging.debug("Done:\t Getting Binarized Methyl data") # -# logging.debug("Start:\t Getting Binned Methyl Data") +# logging.debug("Start:\t Getting Binned Methyl data") # lenBins = 3298 # nbBins = 9 # overlapping = 463 @@ -906,9 +906,9 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # binnedMethyl.attrs["name"] = "bMethyl" + str(nbBins) # binnedMethyl.attrs["sparse"] = False # binnedMethyl.attrs["binary"] = True -# logging.debug("Done:\t Getting Binned Methyl Data") +# logging.debug("Done:\t Getting Binned Methyl data") # -# logging.debug("Start:\t Getting Binarized Methyl Data") +# logging.debug("Start:\t Getting Binarized Methyl data") # k = findClosestPowerOfTwo(17) - 1 # try: # factorizedLeftBaseMatrix = np.genfromtxt( @@ -926,9 +926,9 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # bMethylDset.attrs["name"] = "BMethyl" + str(k) # bMethylDset.attrs["sparse"] = False # bMethylDset.attrs["binary"] = True -# logging.debug("Done:\t Getting Binarized Methyl Data") +# logging.debug("Done:\t Getting Binarized Methyl data") # -# logging.debug("Start:\t Getting Binned Methyl Data") +# logging.debug("Start:\t Getting Binned Methyl data") # lenBins = 2038 # nbBins = 16 # overlapping = 442 @@ -948,7 +948,7 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # binnedMethyl.attrs["name"] = "bMethyl" + str(nbBins) # binnedMethyl.attrs["sparse"] = False # binnedMethyl.attrs["binary"] = True -# logging.debug("Done:\t Getting Binned Methyl Data") +# logging.debug("Done:\t Getting Binned Methyl data") # # labelFile = open(path + 'brca_labels_triple-negatif.csv') # labels = np.array([int(line.strip().split(',')[1]) for line in labelFile]) @@ -977,16 +977,16 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # def getModifiedMultiOmicDBcsv(features, path, name, NB_CLASS, LABELS_NAMES): # datasetFile = h5py.File(path + "ModifiedMultiOmic.hdf5", "w") # -# logging.debug("Start:\t Getting Methylation Data") +# logging.debug("Start:\t Getting Methylation data") # methylData = np.genfromtxt(path + "matching_methyl.csv", delimiter=',') # methylDset = datasetFile.create_dataset("View0", methylData.shape) # methylDset[...] = methylData # methylDset.attrs["name"] = "Methyl_" # methylDset.attrs["sparse"] = False # methylDset.attrs["binary"] = False -# logging.debug("Done:\t Getting Methylation Data") +# logging.debug("Done:\t Getting Methylation data") # -# logging.debug("Start:\t Getting Sorted Methyl Data") +# logging.debug("Start:\t Getting Sorted Methyl data") # Methyl = datasetFile["View0"][...] # sortedMethylGeneIndices = np.zeros(datasetFile.get("View0").shape, dtype=int) # MethylRanking = np.zeros(datasetFile.get("View0").shape, dtype=int) @@ -1001,9 +1001,9 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # mMethylDset.attrs["name"] = "SMethyl" # mMethylDset.attrs["sparse"] = False # mMethylDset.attrs["binary"] = False -# logging.debug("Done:\t Getting Sorted Methyl Data") +# logging.debug("Done:\t Getting Sorted Methyl data") # -# logging.debug("Start:\t Getting Binarized Methyl Data") +# logging.debug("Start:\t Getting Binarized Methyl data") # k = findClosestPowerOfTwo(58) - 1 # try: # factorizedLeftBaseMatrix = np.genfromtxt( @@ -1021,9 +1021,9 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # bMethylDset.attrs["name"] = "BMethyl" # bMethylDset.attrs["sparse"] = False # bMethylDset.attrs["binary"] = True -# logging.debug("Done:\t Getting Binarized Methyl Data") +# logging.debug("Done:\t Getting Binarized Methyl data") # -# logging.debug("Start:\t Getting Binned Methyl Data") +# logging.debug("Start:\t Getting Binned Methyl data") # lenBins = 2095 # nbBins = 58 # overlapping = 1676 @@ -1043,18 +1043,18 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # binnedMethyl.attrs["name"] = "bMethyl" # binnedMethyl.attrs["sparse"] = False # binnedMethyl.attrs["binary"] = True -# logging.debug("Done:\t Getting Binned Methyl Data") +# logging.debug("Done:\t Getting Binned Methyl data") # -# logging.debug("Start:\t Getting MiRNA Data") +# logging.debug("Start:\t Getting MiRNA data") # mirnaData = np.genfromtxt(path + "matching_mirna.csv", delimiter=',') # mirnaDset = datasetFile.create_dataset("View1", mirnaData.shape) # mirnaDset[...] = mirnaData # mirnaDset.attrs["name"] = "MiRNA__" # mirnaDset.attrs["sparse"] = False # mirnaDset.attrs["binary"] = False -# logging.debug("Done:\t Getting MiRNA Data") +# logging.debug("Done:\t Getting MiRNA data") # -# logging.debug("Start:\t Getting Sorted MiRNA Data") +# logging.debug("Start:\t Getting Sorted MiRNA data") # MiRNA = datasetFile["View1"][...] # sortedMiRNAGeneIndices = np.zeros(datasetFile.get("View1").shape, dtype=int) # MiRNARanking = np.zeros(datasetFile.get("View1").shape, dtype=int) @@ -1069,9 +1069,9 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # mmirnaDset.attrs["name"] = "SMiRNA_" # mmirnaDset.attrs["sparse"] = False # mmirnaDset.attrs["binary"] = False -# logging.debug("Done:\t Getting Sorted MiRNA Data") +# logging.debug("Done:\t Getting Sorted MiRNA data") # -# logging.debug("Start:\t Getting Binarized MiRNA Data") +# logging.debug("Start:\t Getting Binarized MiRNA data") # k = findClosestPowerOfTwo(517) - 1 # try: # factorizedLeftBaseMatrix = np.genfromtxt( @@ -1089,9 +1089,9 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # bmirnaDset.attrs["name"] = "BMiRNA_" # bmirnaDset.attrs["sparse"] = False # bmirnaDset.attrs["binary"] = True -# logging.debug("Done:\t Getting Binarized MiRNA Data") +# logging.debug("Done:\t Getting Binarized MiRNA data") # -# logging.debug("Start:\t Getting Binned MiRNA Data") +# logging.debug("Start:\t Getting Binned MiRNA data") # lenBins = 14 # nbBins = 517 # overlapping = 12 @@ -1111,9 +1111,9 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # binnedMiRNA.attrs["name"] = "bMiRNA_" # binnedMiRNA.attrs["sparse"] = False # binnedMiRNA.attrs["binary"] = True -# logging.debug("Done:\t Getting Binned MiRNA Data") +# logging.debug("Done:\t Getting Binned MiRNA data") # -# logging.debug("Start:\t Getting RNASeq Data") +# logging.debug("Start:\t Getting RNASeq data") # rnaseqData = np.genfromtxt(path + "matching_rnaseq.csv", delimiter=',') # uselessRows = [] # for rowIndex, row in enumerate(np.transpose(rnaseqData)): @@ -1125,9 +1125,9 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # rnaseqDset.attrs["name"] = "RNASeq_" # rnaseqDset.attrs["sparse"] = False # rnaseqDset.attrs["binary"] = False -# logging.debug("Done:\t Getting RNASeq Data") +# logging.debug("Done:\t Getting RNASeq data") # -# logging.debug("Start:\t Getting Sorted RNASeq Data") +# logging.debug("Start:\t Getting Sorted RNASeq data") # RNASeq = datasetFile["View2"][...] # sortedRNASeqGeneIndices = np.zeros(datasetFile.get("View2").shape, dtype=int) # RNASeqRanking = np.zeros(datasetFile.get("View2").shape, dtype=int) @@ -1142,9 +1142,9 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # mrnaseqDset.attrs["name"] = "SRNASeq" # mrnaseqDset.attrs["sparse"] = False # mrnaseqDset.attrs["binary"] = False -# logging.debug("Done:\t Getting Sorted RNASeq Data") +# logging.debug("Done:\t Getting Sorted RNASeq data") # -# logging.debug("Start:\t Getting Binarized RNASeq Data") +# logging.debug("Start:\t Getting Binarized RNASeq data") # k = findClosestPowerOfTwo(100) - 1 # try: # factorizedLeftBaseMatrix = np.genfromtxt( @@ -1163,9 +1163,9 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # brnaseqDset.attrs["name"] = "BRNASeq" # brnaseqDset.attrs["sparse"] = False # brnaseqDset.attrs["binary"] = True -# logging.debug("Done:\t Getting Binarized RNASeq Data") +# logging.debug("Done:\t Getting Binarized RNASeq data") # -# logging.debug("Start:\t Getting Binned RNASeq Data") +# logging.debug("Start:\t Getting Binned RNASeq data") # lenBins = 986 # nbBins = 142 # overlapping = 493 @@ -1185,18 +1185,18 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # binnedRNASeq.attrs["name"] = "bRNASeq" # binnedRNASeq.attrs["sparse"] = False # binnedRNASeq.attrs["binary"] = True -# logging.debug("Done:\t Getting Binned RNASeq Data") +# logging.debug("Done:\t Getting Binned RNASeq data") # -# logging.debug("Start:\t Getting Clinical Data") +# logging.debug("Start:\t Getting Clinical data") # clinical = np.genfromtxt(path + "clinicalMatrix.csv", delimiter=',') # clinicalDset = datasetFile.create_dataset("View3", clinical.shape) # clinicalDset[...] = clinical # clinicalDset.attrs["name"] = "Clinic_" # clinicalDset.attrs["sparse"] = False # clinicalDset.attrs["binary"] = False -# logging.debug("Done:\t Getting Clinical Data") +# logging.debug("Done:\t Getting Clinical data") # -# logging.debug("Start:\t Getting Binarized Clinical Data") +# logging.debug("Start:\t Getting Binarized Clinical data") # binarized_clinical = np.zeros((347, 1951), dtype=np.uint8) # nb_already_done = 0 # for feqtureIndex, feature in enumerate(np.transpose(clinical)): @@ -1210,9 +1210,9 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # bClinicalDset.attrs["name"] = "bClinic" # bClinicalDset.attrs["sparse"] = False # bClinicalDset.attrs["binary"] = True -# logging.debug("Done:\t Getting Binarized Clinical Data") +# logging.debug("Done:\t Getting Binarized Clinical data") # -# # logging.debug("Start:\t Getting Adjacence RNASeq Data") +# # logging.debug("Start:\t Getting Adjacence RNASeq data") # # sparseAdjRNASeq = getAdjacenceMatrix(RNASeqRanking, sortedRNASeqGeneIndices, k=findClosestPowerOfTwo(10)-1) # # sparseAdjRNASeqGrp = datasetFile.create_group("View6") # # dataDset = sparseAdjRNASeqGrp.create_dataset("data", sparseAdjRNASeq.data.shape, data=sparseAdjRNASeq.data) @@ -1223,7 +1223,7 @@ def getClassicDBcsv(views, pathF, nameDB, NB_CLASS, askedLabelsNames, # # sparseAdjRNASeqGrp.attrs["name"]="ARNASeq" # # sparseAdjRNASeqGrp.attrs["sparse"]=True # # sparseAdjRNASeqGrp.attrs["shape"]=sparseAdjRNASeq.shape -# # logging.debug("Done:\t Getting Adjacence RNASeq Data") +# # logging.debug("Done:\t Getting Adjacence RNASeq data") # # labelFile = open(path + 'brca_labels_triple-negatif.csv') # labels = np.array([int(line.strip().split(',')[1]) for line in labelFile]) diff --git a/multiview_platform/tests/test_ExecClassif.py b/multiview_platform/tests/test_ExecClassif.py index 9da27eef..3420821a 100644 --- a/multiview_platform/tests/test_ExecClassif.py +++ b/multiview_platform/tests/test_ExecClassif.py @@ -412,7 +412,7 @@ class Test_execOneBenchmark_multicore(unittest.TestCase): # help='Name of the views selected for learning (default: %(default)s)', # default=['']) # groupStandard.add_argument('--pathF', metavar='STRING', action='store', help='Path to the views (default: %(default)s)', -# default='/home/bbauvin/Documents/Data/Data_multi_omics/') +# default='/home/bbauvin/Documents/data/Data_multi_omics/') # groupStandard.add_argument('--nice', metavar='INT', action='store', type=int, # help='Niceness for the process', default=0) # groupStandard.add_argument('--randomState', metavar='STRING', action='store', diff --git a/multiview_platform/tests/tmp_tests/test_file.hdf5 b/multiview_platform/tests/tmp_tests/test_file.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..61b8ac6df7647609d0c294b06be19ce42362af47 GIT binary patch literal 2072 zcmeD5aB<`1lHy_j0S*oZ76t(j3y%LofdmIc2+I8r;W02IKpBisx&unDV1h6h89<PM zK?1^M5QLhKt}Z0V)s=yPkpX5tjD~7sFkpeOpw57BM#&Kq0v@i80U*ytfCvT#Xewf0 zH~~#(P+<n2#H7@mVo*wgh<HFKvPnh;HU@Co1`{B2At{-W5hB10H5ow*fYl3dFfc&U zssmUBD+32uoQa7EtdRqnBbXVX%AkC(Vg?2VA*e7&vBKGoSO$g$nACvM!G8WOpd1bH e8v}y^%;N*<s!_{FLtr!nC<y^jIRnxUD^3Bs=P%v> literal 0 HcmV?d00001 diff --git a/setup.py b/setup.py index 08366b42..8ed4b6c4 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ def setup_package(): # Une url qui pointe vers la page officielle de votre lib url='http://github.com/babau1/multiview-machine-learning-omis/', install_requires=['numpy>=1.8', 'scipy>=0.16','scikit-learn==0.19', - 'h5py', 'joblib', 'pyscm', 'pandas'], + 'h5py', 'joblib', 'pyscm', 'pandas', 'm2r'], # Il est d'usage de mettre quelques metadata à propos de sa lib # Pour que les robots puissent facilement la classer. # La liste des marqueurs autorisées est longue: @@ -80,7 +80,7 @@ def setup_package(): # La syntaxe est "nom-de-commande-a-creer = package.module:fonction". entry_points={ 'console_scripts': [ - 'exec_multiview = multiview_platform.Exec:Exec', + 'exec_multiview = multiview_platform.execute:exec', ], }, -- GitLab