Yahoo: hurrayforanime99 ECCV2006-Final.pdf "This paper introduces a nonparametric approach based on the meanshift algorithm for overcoming the limitations of the EM algorithm." Excerpts from the paper: Section 1 - Introduction The Expectation Maximization algorithm (EM) [1] perhaps is the most fre- quently used parametric technique for estimating probability density functions (PDF) in both univariate and multivariate cases. It has been widely applied in computer vision [2], and pattern recognition [3] applications. In all of these areas, EM is used to model the PDF of a set of feature vectors using a given parametric model. Usually a mixture of Gaussians with a finite number of components is used to approximate the density function. The main advantage of EM is that it provides a closed-form analytical representation of the PDF. However, EM suffers a few limitations that will be discussed later. [...snip...] Section 2.1 - Expectation Maximization [...snip...] Several attempts have been made to overcome the drawbacks of the EM algorithm. Figuerideo and Jain [4] broadly classify these methods into two cate- gories: deterministic approaches and stochastic approaches. Deterministic meth- ods, such as [5] and [6], are based on selecting the number of components according to some model selection criterion, which usually contains an increasing function that penalizes higher number of components. In [7] and [8] stochastic approaches based on Markov Chain Monte Carlo methods are used. Section 2.2 - The Mean-Shift The mode-finding algorithm introduced in [9] which is based on the mean-shift algorithm [10], compared to the k-means, consistently converges to the modes of the underlying density function. Therefore, for example, when applied to the data set in Fig. 1, the mean-shift successfully finds the two local maximum at [0, -10]T and [0, 10]T respectively. [...snip...] Section 3 - The Penalty-Less Information Criterion [...snip...] where N i is the number of vectors in cluster i, Hj is the adaptive bandwidth matrix of vector j, xi is vector number j of cluster i and K(.) is the kernel function. Since computing the kernel-based estimate of the PDF is computationally prohibitive in higher dimensions, we use the Improved Fast Guass Transform [11], which significantly reduces the complexity of the problem. The adaptive bandwidth is computed using the sample-point estimator of [12]. In this paper, we use the standard multivariate Gaussian as the kernel function. We define the Penalty-less Information Criterion (PIC ) of a model with k components as the sum of weighted Jensen-Shannon divergence [13] between piEM and piKDE for all clusters as follows [...snip...] Section 4 - Nonparametric EM Using Mixtures of Mixtures Let Y = {xi }M be a set of M vectors to be modeled. Here we use Y instead i=1 of X to denote the entire data set for reasons that will become clear later on. If we apply the mean-shift mode finding algorithm, proposed in [9], and only retain the modes with positive definite Hessian, we will obtain a set of m modes Yc = {xcj }m which represent the local maxima points of the density function, j=1 where m M . For details on computing the Hessian, the reader is referred to Han et al.'s method [14]. [...snip...] where Pj is the Hessian of mode j. The rationale here, as explained in [14] is to replace the covariance matrix, which may not be accurate at this point, by the Hessian which represents the local curvature around the mode xcj . Each vector is then assigned to a specific mode according to Equation 11. 14 references: 1. dempster1977 The expectation maximization algorithm is popular but has some problems which this paper addresses. 2. 939161 Example of widespread use of EM in computer vision. 3. abunaser1998 Example of widespread use of EM in pattern recognition. 4. 507482 Classifies attempts to improve drawbacks of EM into two categories: deterministic and stochastic approaches. 5. dasgupta1998 An example of a deterministic method for improving EM. 6. campbell1997 An example of a deterministic method for improving EM. 7. neal1992 An example of a stochastic method based on MCMC for improving EM. 8. 599262 An example of a stochastic method based on MCMC for improving EM. 9. 513076 A mode-finding algorithm based on the mean-shift algorithm consistently finds modes but is unable to infer the hidden structure of the data. 10. fukunaga1975 The mean-shift algorithm (used by reference #9). 11. 946593 The Improved Fast Gauss Transform. Used to for reducing complexity of estimating the PDF in high dimensions. 12. joneswand1995 Sample-point estimator. Used to compute the "adaptive bandwidth." 13. lin1991 Jensen-Shannon divergence. Used in the definition of the Penalty-less Information Criterion. 14. han2004 Method for computing the Hessian. @inproceedings{waeldensity2006, author = {Wael Abd-Almageed and Larry Davis}, title = {Density Estimation using Mixtures of Mixtures of Gaussians}, booktitle = {European Conference on Computer Vision}, year = {2006}, issn = {0302-9743}, pages = {410--422}, publisher = {Springer-Verlag Berlin Heidelberg}, } # { # http://citeseer.ist.psu.edu/context/828/0 @article{dempster1977, author = {A.P. Dempster and N.M. Laird and D.B. Rubin}, title = {Maximum likelihood from incomplete data via the EM algorithm}, year = {1977}, journal = {Journal of the Royal Statistical Society}, volume = {39}, number = {1}, pages = {1--38}, } # http://portal.acm.org/citation.cfm?id=939161 @inproceedings{939161, author = {Serge Belongie and Chad Carson and Hayit Greenspan and Jitendra Malik}, title = {Color- and Texture-Based Image Segmentation Using EM and Its Application to Content-Based Image Retrieval}, booktitle = {ICCV '98: Proceedings of the Sixth International Conference on Computer Vision}, year = {1998}, isbn = {81-7319-221-9}, pages = {675}, publisher = {IEEE Computer Society}, address = {Washington, DC, USA}, } # abu-naser_impulse_retoration.pdf # http://adsabs.harvard.edu/abs/1998JOSAA..15.2327A @ARTICLE{abunaser1998, author = {{Abu-Naser}, A. and {Galatsanos}, N.~P. and {Wernick}, M.~N. and {Schonfeld}, D.}, title = {Object recognition based on impulse restoration with use of the expectation-maximization algorithm}, journal = {Journal of the Optical Society of America A}, year = {1998}, month = {sep}, volume = {15}, pages = {2327--2340}, adsurl = {http://adsabs.harvard.edu/cgi-bin/nph-bib_query?bibcode=1998JOSAA..15.2327A&db_key=PHY}, adsnote = {Provided by the Smithsonian/NASA Astrophysics Data System} } # http://portal.acm.org/citation.cfm?id=507482 # 62 (?) references: # 2. 599262 # 10. campbell1997 # 17. dasgupta1998 # 40. neal1992 @article{507482, author = {Mario A. T. Figueiredo and Anil K. Jain}, title = {Unsupervised Learning of Finite Mixture Models}, journal = {IEEE Trans. Pattern Anal. Mach. Intell.}, volume = {24}, number = {3}, year = {2002}, issn = {0162-8828}, pages = {381--396}, doi = {http://dx.doi.org/10.1109/34.990138}, publisher = {IEEE Computer Society}, address = {Washington, DC, USA}, } @article{dasgupta1998, author = {A. Dasgupta and A. Raftery}, title = {Detecting Features in Spatial Point Patterns with Clutter Via Model-Based Clustering}, journal = {J.~Am.~Statistical Assoc.}, volume = {93}, year = {1998}, pages = {294--302}, } # http://www.stat.washington.edu/raftery/Research/PDF/campbell1997.pdf # http://citeseer.ist.psu.edu/108342.html @article{campbell1997, author = {J. Campbell and C. Fraley and F. Murtagh and and A. Raftery}, title = {Linear Flaw Detection in Woven Textiles Using Model-Based Clustering}, journal = {Pattern Recognition Letters}, volume = {18}, year = {1997}, pages = {1539--1548}, } # http://www.cs.toronto.edu/~radford/bmm-maxent.abstract.html # Related: http://www.cs.toronto.edu/~radford/ftp/bmm.pdf @inproceedings{neal1992, author = {Radford M. Neal}, title = {Bayesian Mixture Modeling}, booktitle = {Maximum Entropy and Bayesian Methods: Proceedings of the 11th International Workshop on Maximum Entropy and Bayesian Methods of Statistical Analysis}, year = {1992}, issn = {0302-9743}, pages = {197--211}, publisher = {Dordrecht: Kluwer Academic Publishers}, address = {Seattle}, } # http://www.stat.washington.edu/raftery/Research/PDF/bensmail1997.pdf # http://citeseer.ist.psu.edu/47204.html # http://portal.acm.org/citation.cfm?id=599262 @article{599262, author = {Halima Bensmail and Gilles Celeux and Adrian E. Raftery and Christian P. Robert}, title = {Inference in model-based cluster analysis}, journal = {Statistics and Computing}, volume = {7}, number = {1}, year = {1997}, issn = {0960-3174}, pages = {1--10}, doi = {http://dx.doi.org/10.1023/A:1018510926151}, publisher = {Kluwer Academic Publishers}, address = {Hingham, MA, USA}, } # http://portal.acm.org/citation.cfm?id=513076 # http://www.caip.rutgers.edu/~comanici/Papers/MsRobustApproach.pdf # mean_shift_robust_approach.pdf # 67 References @article{513076, author = {Dorin Comaniciu and Peter Meer}, title = {Mean Shift: A Robust Approach Toward Feature Space Analysis}, journal = {IEEE Trans. Pattern Anal. Mach. Intell.}, volume = {24}, number = {5}, year = {2002}, issn = {0162-8828}, pages = {603--619}, doi = {http://dx.doi.org/10.1109/34.1000236}, publisher = {IEEE Computer Society}, address = {Washington, DC, USA}, } # http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=1055330 @article{fukunaga1975, author = {Fukunaga, K. and Hostetler, L.}, title = {The estimation of the gradient of a density function, with applications in pattern recognition}, journal = {IEEE Trans.~on Information Theory}, volume = {21}, number = {1}, year = {1975}, month = {Jan}, issn = {0018-9448}, pages = {32--40}, } # http://portal.acm.org/citation.cfm?id=946593 # http://citeseer.ist.psu.edu/yang03improved.html # http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=1238383 # http://umiacs.umd.edu/~ramani/pubs/0464_yang.pdf @inproceedings{946593, author = {Changjiang Yang and Ramani Duraiswami and Nail A. Gumerov and Larry Davis}, title = {Improved Fast Gauss Transform and Efficient Kernel Density Estimation}, booktitle = {ICCV '03: Proceedings of the Ninth IEEE International Conference on Computer Vision}, year = {2003}, isbn = {0-7695-1950-4}, pages = {464}, publisher = {IEEE Computer Society}, address = {Washington, DC, USA}, } # NOTE: R provides functions described in this book, e.g., # http://lib.stat.cmu.edu/R/CRAN/doc/packages/KernSmooth.pdf @book{joneswand1995, author = {M. Chris Jones and Matt P. Wand}, title = {Kernel Smoothing}, publisher = {Chapman and Hall}, year = {1995}, } # http://www.mai.liu.se/~tikos/kurser/TAMS23/lindiv.pdf # http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=61115 @article{lin1991, title = {Divergence measures based on the Shannon entropy}, author = {Jianhua Lin}, journal = {IEEE Transactions on Information Theory}, number = {1}, pages = {145--151}, volume = {37}, year = {1991}, } # http://www.cs.umd.edu/~bhhan/papers/cvpr2004.pdf # http://citeseer.ist.psu.edu/han04incremental.html @article{han2004, author = {Bohyung Han and Dorin Comaniciu and Ying Zhu and Larry Davis}, title = {Incremental Density Approximation and Kernel-Based Bayesian Filtering for Object Tracking}, journal = {cvpr}, volume = {01}, year = {2004}, issn = {1063-6919}, pages = {638-644}, doi = {http://doi.ieeecomputersociety.org/10.1109/CVPR.2004.130}, publisher = {IEEE Computer Society}, address = {Los Alamitos, CA, USA}, } # }