@CONFERENCE\{IMM2010-06100, author = "S. G. Karadogan and J. Larsen and M. S. Pedersen and J. B. Boldt", title = "Robust Isolated Speech Recognition Using Binary Masks", year = "2010", month = "aug", keywords = "Binary masks, speech recognition", booktitle = "European Signal Porcessing Conference", volume = "", series = "", editor = "", publisher = "", organization = "", address = "", url = "http://www2.compute.dtu.dk/pubdb/pubs/6100-full.html", abstract = "In this paper, we represent a new approach for robust speaker independent {ASR} using binary masks as feature vectors. This method is evaluated on an isolated digit database, {TIDIGIT} in three noisy environments (car, bottle and cafe noise types taken from the {DRCD} Sound Effects Library). Discrete Hidden Markov Models are used for the recognition and the observation vectors are quantized with the {K-}means algorithm using a Hamming distance. It is found that a recognition rate as high as 92\% for clean speech is achievable using Ideal Binary Masks (IBM) where we assume prior target and noise information is available. We propose that using a Target Binary Mask (TBM), where only prior target information is needed, performs as good as using IBMs. We also propose a {TBM} estimation method based on target sound estimation using non-negative sparse coding (NNSC). The recognition results for TBMs with and without the estimation method for noisy conditions are evaluated and compared with those of using Mel Frequency Cepstral Coefficients (MFCC). It is observed that binary mask feature vectors are robust to noisy conditions." }