@MASTERSTHESIS\{IMM2012-06351, author = "R. Troelsg{\aa}rd", title = "Author and Topic Modelling in Text Data", year = "2012", school = "Technical University of Denmark, {DTU} Informatics, {E-}mail: reception@imm.dtu.dk", address = "Asmussens Alle, Building 305, {DK-}2800 Kgs. Lyngby, Denmark", type = "", note = "Supervised by Professor Lars Kai Hansen, lkh@imm.dtu.dk, {DTU} Informatics", url = "http://www.imm.dtu.dk/English.aspx", abstract = "This thesis deals with probabilistic modelling of authors, documents, and topics in textual data. The focus is on the Latent Dirichlet Allocation (LDA) model and the Author-Topic (AT) model where Gibbs sampling is used for inferring model parameters from data. Furthermore, a method for optimising hyper parameters in an {ML-II} setting is described. Model properties are discussed in connection with applications of the models which include detection of unlikely documents among scientific papers from the {NIPS} conferences using document perplexity, and the problem of link prediction in the online social network Twitter for which the results are reported as Area Under the {ROC} curve (AUC) and compared to well known graph-based methods." }