@CONFERENCE\{IMM2004-02894,
    author       = "R. E. Madsen and J. Larsen and L. K. Hansen",
    title        = "Part-of-Speech Enhanced Context Recognition",
    year         = "2004",
    month        = "sep",
    keywords     = "text mining, latent space, context recognition",
    pages        = "635-644",
    booktitle    = "Proceedings of {IEEE} Workshop on Machine Learning for Signal Processing {XIV}",
    volume       = "",
    series       = "",
    editor       = "A.K. Barros, J. Principe, J. Larsen, T. Adali, S. Douglas",
    publisher    = "{IEEE} Press",
    organization = "",
    address      = "Piscataway, New Jersey",
    url          = "http://www2.compute.dtu.dk/pubdb/pubs/2894-full.html",
    abstract     = "Language independent `bag-of-words' representations
are surprisingly efective for text classi¯cation. In this communi-
cation our aim is to elucidate the synergy between language inde-
pendent features and simple language model features. We consider
term tag features estimated by a so-called part-of-speech tagger.
The feature sets are combined in an early binding design with an
optimized binding coefficient that allows weighting of the relative
variance contributions of the participating feature sets. With the
combined features documents are classi¯ed using a latent semantic
indexing representation and a probabilistic neural network classi-
fier. Three medium size data-sets are analyzed and we find consis-
tent synergy between the term and natural language features in all
three sets for a range of training set sizes. The most significant en-
hancement is found for small text databases where high recognition
rates are possible.",
    isbn_issn    = "0-7803-8609-4"
}