@CONFERENCE\{IMM2005-03997, author = "R. E. Madsen and D. Kauchak and C. Elkan", title = "Approximating The Dirichlet Compound Multinomial Distribution", year = "2005", month = "dec", keywords = "{DCM,} Dirichlet, Polya, Text mining", booktitle = "Neural Information Processing Systems", volume = "", series = "", editor = "", publisher = "", organization = "", address = "", url = "http://www2.compute.dtu.dk/pubdb/pubs/3997-full.html", abstract = "We investigate the Dirichlet compound multinomial (DCM), which has recently been shown to be a good model for word burstiness in documents. We provide a number of conceptual explanations that account for these recent results. We then derive an exponential family approximation of the {DCM} that is substantially faster to train, while still producing similar probabilities and classification performance. We also investigate Fisher kernels using the {DCM} model for generating distributionally based similarity scores. Initial experiments show promise for this type of similarity method." }