From de15bfa20b20c2818cffedce9174867852e86f68 Mon Sep 17 00:00:00 2001 From: Charly Lamothe <charly.lamothe@sfr.fr> Date: Wed, 9 Oct 2019 02:27:10 +0200 Subject: [PATCH] Add Fawagreh2015 desc --- reports/bolsonaro.tex | 2 ++ reports/bolsonaro_biblio.bib | 18 +++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/reports/bolsonaro.tex b/reports/bolsonaro.tex index 04b5827..9c83998 100644 --- a/reports/bolsonaro.tex +++ b/reports/bolsonaro.tex @@ -75,6 +75,8 @@ where: $cor_{t_i, t_j} = correltion(predict_{t_i}, predict_{t_j} ) $ is the corr \item $measure_3$ \end{itemize} For the experiments, they use breast cancer prognosis. They reduce the size of a forest of 100 trees to a forest of on average 26 trees keeping the same error rate. + +\item \cite{Fawagreh2015}: The goal is to get a much smaller forest while staying accurate and diverse. To do so, they used a clustering algorithm. Let $C(t_i, T) = \{c_{i1}, \dots, c_{im}\}$ denotes a vector of class labels obtained after having $t_i$ classify the training set $T$ of size $m$, with $t_i \in F$, $F$ the forest of size $n$. Let $\mathcal{C} = \bigcup^n_{i=1} C(t_i, T)$ be the super vector of all class vectors classified by each tree $t_i$. They then applied a clustering algorithm on $\mathcal{C}$ to find $k = \sqrt{\frac{n}{2}}$ clusters. Finally, the final forest $F'$ is composed on the union of each tree that is the most representative per cluster, for each cluster. So if you have 100 trees and 7 clusters, the final number of trees will be 7. They obtained at least similar performances as with regular RF algorithm. \end{itemize} diff --git a/reports/bolsonaro_biblio.bib b/reports/bolsonaro_biblio.bib index b4e1683..94efbec 100644 --- a/reports/bolsonaro_biblio.bib +++ b/reports/bolsonaro_biblio.bib @@ -11,4 +11,20 @@ @article{Zhang, title={Search for the smallest random forest}, author={Zhang, Heping and Wang, Minghui} -} \ No newline at end of file +} + +@article{Fawagreh2015, + author = {{Fawagreh}, Khaled and {Medhat Gaber}, Mohamad and {Elyan}, Eyad}, + title = "{On Extreme Pruning of Random Forest Ensembles for Real-time Predictive Applications}", + journal = {arXiv e-prints}, + keywords = {Computer Science - Machine Learning}, + year = "2015", + month = "Mar", + eid = {arXiv:1503.04996}, + pages = {arXiv:1503.04996}, +archivePrefix = {arXiv}, + eprint = {1503.04996}, + primaryClass = {cs.LG}, + adsurl = {https://ui.adsabs.harvard.edu/abs/2015arXiv150304996F}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} -- GitLab