@inbook{c0e3c7c7328f4cf1bd1d8d30330c3742,
title = "Semantic similarity-enhanced topic models for document analysis",
abstract = "In e-learning environment, more and more larger-scale text resources are generated by teaching–learning interactions. Finding latent topics in these resources can help us understand the teaching contents and the learners{\textquoteright} interests and focuses. Latent Dirichlet allocation (LDA) has been widely used in many areas to extract the latent topics in a text corpus. However, the extracted topics cannot be understood by the end user. Adding more auxiliary information to LDA to guide the process of topic extraction is a good way to improve the interpretability of topic modeling. Co-occurrence information in corpus is such information, but it is not sufficient yet to measure the similarity between word pairs, especially in sparse document space. To deal with this problem, we propose a new semantic similarity-enhanced topic model in this paper. In this model, we use not only co-occurrence information but also the semantic similarity based on WordNet as auxiliary information. Those two kinds of information are combined into a topic-word component though generative P{\'o}lya urn model. The distribution of documents over the extracted topics obtained by the new model can be inputted to the classifier. The accuracy of extracting topics can improve the performance of the classifier. Our experiments on newsgroup corpus show that the semantic similarity-enhanced topic model performs better than the topic models with only single information separately.",
keywords = "Generative p{\'o}lya urn model, Gibbs sampling, LDA, Semantic similarity, Topic modeling, WordNet",
author = "Yan Gao and Dunwei Wen",
note = "Publisher Copyright: {\textcopyright} Springer-Verlag Berlin Heidelberg 2015.",
year = "2015",
doi = "10.1007/978-3-662-44447-4_3",
language = "English",
series = "Lecture Notes in Educational Technology",
number = "9783662444467",
pages = "45--56",
booktitle = "Lecture Notes in Educational Technology",
edition = "9783662444467",
}