@CONFERENCE\{IMM2004-02894, author = "R. E. Madsen and J. Larsen and L. K. Hansen", title = "Part-of-Speech Enhanced Context Recognition", year = "2004", month = "sep", keywords = "text mining, latent space, context recognition", pages = "635-644", booktitle = "Proceedings of {IEEE} Workshop on Machine Learning for Signal Processing {XIV}", volume = "", series = "", editor = "A.K. Barros, J. Principe, J. Larsen, T. Adali, S. Douglas", publisher = "{IEEE} Press", organization = "", address = "Piscataway, New Jersey", url = "http://www2.compute.dtu.dk/pubdb/pubs/2894-full.html", abstract = "Language independent `bag-of-words' representations are surprisingly efective for text classiŻcation. In this communi- cation our aim is to elucidate the synergy between language inde- pendent features and simple language model features. We consider term tag features estimated by a so-called part-of-speech tagger. The feature sets are combined in an early binding design with an optimized binding coefficient that allows weighting of the relative variance contributions of the participating feature sets. With the combined features documents are classiŻed using a latent semantic indexing representation and a probabilistic neural network classi- fier. Three medium size data-sets are analyzed and we find consis- tent synergy between the term and natural language features in all three sets for a range of training set sizes. The most significant en- hancement is found for small text databases where high recognition rates are possible.", isbn_issn = "0-7803-8609-4" }