@CONFERENCE\{IMM2004-02893, author = "R. E. Madsen and S. Sigurdsson and L. K. Hansen", title = "Enhanced Context Recognition by Sensitivity Pruned Vocabularies", year = "2004", month = "aug", keywords = "sensitivity, neural networks, text, classification, dimensionality", pages = "483-486", booktitle = "Proceedings of 17th International Conference on Pattern Recognition ({ICPR} 2004)", volume = "2", series = "", editor = "", publisher = "", organization = "", address = "Cambridge {UK}", url = "http://www2.compute.dtu.dk/pubdb/pubs/2893-full.html", abstract = "Language independent `bag-of-words' representations are surprisingly effective for text classification. The generic {BOW} approach is based on a high-dimensional vocabulary which may reduce the generalization performance of subsequent classifiers, e.g., based on ill-posed principal component transformations. In this communication our aim is to study the effect of sensitivity based pruning of the bag-of-words representation. We consider neural network based sensitivity maps for determination of term relevancy, when pruning the vocabularies. With reduced vocabularies documents are classified using a latent semantic indexing representation and a probabilistic neural network classifier. Pruning the vocabularies to approximately 20\% of the original size, we find consistent context recognition enhancement for two mid size data-sets for a range of training set sizes. We also study the applicability of the sensitivity measure for automated keyword generation." }