@CONFERENCE\{IMM2006-04511, author = "M. N. Schmidt and R. K. Olsson", title = "Single-Channel Speech Separation using Sparse Non-Negative Matrix Factorization", year = "2006", month = "sep", booktitle = "Interspeech", volume = "", series = "", editor = "", publisher = "", organization = "", address = "", url = "http://www2.compute.dtu.dk/pubdb/pubs/4511-full.html", abstract = "We apply machine learning techniques to the problem of separating multiple speech sources from a single microphone recording. The method of choice is a sparse non-negative matrix factorization algorithm, which in an unsupervised manner can learn sparse representations of the data. This is applied to the learning of personalized dictionaries from a speech corpus, which in turn are used to separate the audio stream into its components. We show that computational savings can be achieved by segmenting the training data on a phoneme level. To split the data, a conventional speech recognizer is used. The performance of the unsupervised and supervised adaptation schemes result in significant improvements in terms of the target-to-masker ratio." }