From 0c8af0ef7e4ba91036a094dd25246fff2ddb5b10 Mon Sep 17 00:00:00 2001 From: "farah.cherfaoui" <farah.cherfaoui.lis-lab.fr> Date: Sun, 6 Oct 2019 09:59:24 +0200 Subject: [PATCH] add description of a paper --- reports/bolsonaro.tex | 36 +++++++++++++++++++++++++++++++++--- reports/bolsonaro_biblio.bib | 17 +++++++++++++++++ 2 files changed, 50 insertions(+), 3 deletions(-) create mode 100644 reports/bolsonaro_biblio.bib diff --git a/reports/bolsonaro.tex b/reports/bolsonaro.tex index 2d9ae63..9ea3d5b 100644 --- a/reports/bolsonaro.tex +++ b/reports/bolsonaro.tex @@ -20,11 +20,12 @@ \maketitle -\section{Notation} +\section{Introduction} +\subsection{Notation} $S = \{(x_i, y_i)\}^n_{i=1}$ the dataset, with $x_i \in X$ and $y_i \in Y$. $T = \{t_1, t_2, \dots, t_d\}$ the random forest of $d$ trees, such that $t_j : X \rightarrow Y$. - -\section{Orthogonal Matching Pursuit (OMP)} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Orthogonal Matching Pursuit (OMP)} $y \in \mathbb{R}^n$ a signal. $D \in \mathbb{R}^{n \times d}$ a dictionnary with $d_j \in \mathbb{R^n}$. Goal: find $w \in \mathbb{R}^d$, such that $y = Dw$ and $||w||_0 < k$. $\text{span}(\{v_1, \dots, v_n\}) \{u : u = \sum^n_{i=1} \alpha_i v_i \ | \ \alpha_i \in \mathbb{R}\}$. @@ -43,4 +44,33 @@ $y \in \mathbb{R}^n$ a signal. $D \in \mathbb{R}^{n \times d}$ a dictionnary wit \end{algorithmic} \end{algorithm} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\subsection{Our problem} + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Related Work} +\begin{itemize} +\item \cite{Yang2012}: once the forest $(F = t_1, \dots, t_n)$ is built, he gives each tree a score (which measures the importance of the tree in the forest). The tree with the lowest score is removed from the forest. To eliminate the next tree, all the scores are recomputed, and the tree with the lowest score is removed...\\ +They present in this paper 4 different tree's score. For each tree $t_i$, we compute: +\begin{itemize} +\item $score_1 = mean_{(x,y) \in train}( margin((x,y), F) - margin((x,y),F\backslash t_i))$ +\item $score_2 = min_{(x,y) \in train}( margin((x,y), F) - min_{(x,y) \in train} (margin(F\backslash t_i)))$ +%\item $score_3 = min_{(x,y) \in train}( margin((x,y), F) - min_{(x,y) \in train} (margin(F\backslash t_i)))$ +\end{itemize} +where: +$$ margin((x, y), F) = \frac{1}{|F|} \sum_{i = 1}^{|F|} I(t_i(x) = y) - \sum_{i = 1}^{|F|} \max_{l \neq y} I(t_i(x) = l)$$ +They compute some experiments in several classification (most of them are binary classification) UCI data set, with different number of attribute (from 5 to 61): Diabetes, Heart, Hearts, Iris, Ionosphere, Monks, Sonar, Steel, Tic, Wine. +They construct a random forest model of size 100, then prune it with their Algorithm and obtain a smaller forest with size ranging from 99 to 20. The performances of their algorithms are compared with random forest models with the corresponding sizes (i.e. forest directly constructed with size 99 to 20). +On all the data sets except colon and diabetes data sets, the more the number of trees pruned, the better the performance. +They does not show the variance of the models. They also compare their method with similarity based pruning ( Sim-P) and distance minimization(MarDistM) . Except for diabetes, their method outperforms the other two algorithms. +% +\item \cite{Ren2015}: coming soon :-) +\end{itemize} + + + \section{Reference} + +\nocite{*} +\bibliographystyle{plain} +\bibliography{bolsonaro_biblio} \end{document} diff --git a/reports/bolsonaro_biblio.bib b/reports/bolsonaro_biblio.bib new file mode 100644 index 0000000..5e661c8 --- /dev/null +++ b/reports/bolsonaro_biblio.bib @@ -0,0 +1,17 @@ +@article{Yang2012, + title={Margin optimization based pruning for random forest}, + author={Yang, Fan and Lu, Wei-hang and Luo, Lin-kai and Li, Tao}, + journal={Neurocomputing}, + volume={94}, + pages={54--63}, + year={2012}, + publisher={Elsevier} +} + +@inproceedings{Ren2015, + title={Global refinement of random forest}, + author={Ren, Shaoqing and Cao, Xudong and Wei, Yichen and Sun, Jian}, + booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, + pages={723--730}, + year={2015} +} \ No newline at end of file -- GitLab