PLSA(Probabilistic Latent Semantic Analysis)概率潜在语义分析, 与LSA相比, 有着更坚实的数学基础.
意向模型
PLSA的核心思想即是意向模型(Aspect Model). 对于主题, 将其对应于隐变量, 并与词汇和文档联系起来, 组成统计模型. 同时我们认为每篇文本的每个单词生成过程如下:
因此有以下关系:
则模型可以表示为词与文档的联合概率:
利用贝叶斯公式, 将变换为:
使用极大似然估计, 计算PLSA模型的参数:
其中是词汇在文档中出现的次数.
模型拟合
由于隐变量, 使用EM算法求解.
E步
M步
又有限制:
E步和M步之间不断迭代, 直到收敛.
PLSA缺点
PLSA与LSA的联系
使用E步得到的的后验概率, 极大化似然函数, 更新参数和.
其中, 第一项是常数项, 因此极大化等价于极大化第二项:
因此问题转化为带约束条件的极大值问题, 引入Lagrange函数, , 有:
因此问题转换为. 对求每个参数的偏导数, 并令偏导数都为0, 得到:
PLSA不是一个生成模型. PLSA只能对训练的文本得到降维主题向量, 由组成. 对于新的文本, 没有方法得到主题向量.
P(wi∣zk) P(zk∣di) n(di)=n(di,wj) Lc=i=1∑Nj=1∑Mn(di,wj)logk=1∑KP(wj∣zk)P(zk∣di) τk=j=1∑Mi=1∑Nn(di,wj)P(zk∣di,wj),k=1,⋯,K T^=(P(di∣zk))i,k S^=diag(P(zk))k D^=(P(wj∣zk))j,k P=T^S^D^T z∈Z={z1,⋯,zK} w∈W={w1,⋯,wM} d∈D={d1,⋯,dN} P(d_i,w_j)=P(d_i)P(w_j|d_i) \tag{1}
P(w_j|d_i)=\sum\limits_{k=1}^K P(w_j|z_k)P(z_k|d_i) \tag{2}
\begin{align} P(d_i,w_j) &= P(d_i)P(w_j|d_i) \\ &=P(d_i)\sum\limits_{k=1}^K P(w_j|z_k)P(z_k|d_i) \\ &= \sum\limits_{k=1}^K P(w_j|z_k)P(d_i)P(z_k|d_i) \tag{3} \\ &= \sum\limits_{k=1}^K P(z_k)P(w_j|z_k)P(d_i|z_k) \end{align}
\mathcal{L}=\sum\limits_{i=1}^N\sum\limits_{j=1}^Mn(d_i, w_j)\log{P}(d_i, w_j) \tag{4}
n(di,wj) \begin{align} P(z_k|d_i,w_j) &= \frac{P(z_k,d_i,w_j)}{P(d_i,w_j)} \\ &= \frac{P(d_i,w_j|z_k)P(z_k)}{P(d_i)P(w_j|d_i)} \\ &= \frac{P(d_i|z_k)P(w_j|z_k)P(z_k)}{P(d_i)P(w_j|d_i)} \\ &= \frac{P(w_j|z_k)P(z_k|d_i)P(d_i)}{P(d_i)\sum\limits_{k=1}^K P(w_j|z_k)P(z_k|d_i)} \\ &= \frac{P(w_j|z_k)P(z_k|d_i)}{\sum\limits_{k=1}^K P(w_j|z_k)P(z_k|d_i)} \end{align} \tag{5}
\begin{align} \mathcal{L} &= \sum\limits_{i=1}^N\sum\limits_{j=1}^Mn(d_i,w_j)\log P(d_i,w_j) \\ &= \sum\limits_{i=1}^N\sum\limits_{j=1}^Mn(d_i,w_j)\log[P(d_i)\sum\limits_{k=1}^KP(w_j|z_k)P(z_k|d_i)] \\ &= \sum\limits_{i=1}^Nn(d_i)\log P(d_i) + \sum\limits_{i=1}^N\sum\limits_{j=1}^Mn(d_i,w_j)\log\sum\limits_{k=1}^KP(w_j|z_k)P(z_k|d_i) \tag{6} \end{align}
\max\mathcal{L}\Leftrightarrow\max\sum\limits_{i=1}^N\sum\limits_{j=1}^Mn(d_i,w_j)\log\sum\limits_{k=1}^KP(w_j|z_k)P(z_k|d_i) \tag{7}
\begin{align}E(\mathcal{L}_c) &= \sum\limits_{i=1}^N\sum\limits_{j=1}^Mn(d_i,w_j)E(\log\sum\limits_{k=1}^KP(w_j|z_k)P(z_k|d_i)) \\ &= \sum\limits_{i=1}^N\sum\limits_{j=1}^Mn(d_i,w_j)\sum\limits_{k=1}^KP(z_k|d_i,w_j)\log[P(w_j|z_k)P(z_k|d_i)] \tag{8} \end{align}
\sum\limits_{j=1}^M P(w_j|z_k)=1 \\ \sum\limits_{j=1}^M P(z_k|d_i)=1 \tag{9}
\begin{align} H &= \sum\limits_{i=1}^N \sum\limits_{j=1}^M n(d_i,w_j) \sum\limits_{k=1}^K P(z_k|d_i,w_j)\log[P(w_j|z_k)P(z_k|d_i)] + \sum\limits_{k=1}^K \tau_{k}(1-\sum\limits_{j=1}^MP(w_j|z_k)) + \sum\limits_{i=1}^N \rho_{i}(1-\sum\limits_{k=1}^K P(z_k|d_i)) \\ &= \sum\limits_{i=1}^N \sum\limits_{j=1}^M n(d_i,w_j) \sum\limits_{k=1}^K P(z_k|d_i,w_j)\log(w_j|z_k)+\sum\limits_{j=1}^M n(d_i,w_j) \sum\limits_{k=1}^K P(z_k|d_i,w_j)\log(z_k|d_i)+ \\ & \sum\limits_{k=1}^K \tau_{k}(1-\sum\limits_{j=1}^MP(w_j|z_k)) + \sum\limits_{i=1}^N \rho_{i}(1-\sum\limits_{k=1}^K P(z_k|d_i)) \tag{10} \end{align}
\begin{align}\frac{\partial H}{\partial P(w_j|z_k)}=\sum\limits_{i=1}^N n(d_i,w_j)P(z_k|d_i,w_j)\frac{1}{P(w_j|z_k)}-\tau_k=0, \quad j=1,\cdots,M;k=1,\cdots,K \end{align}
\begin{align}\frac{\partial H}{\partial P(z_k|d_i)}=\sum\limits_{i=1}^N n(d_i,w_j)P(z_k|d_i,w_j)\frac{1}{P(z_k|d_i)}-\rho_i=0, \quad i=1,\cdots,N;k=1,\cdots,K \end{align}
\begin{align}P(w_j|z_k)=\frac{\sum\limits_{i=1}^Nn(d_i,w_j)P(z_k|d_i,w_j)}{\tau_k}, \quad j=1,\cdots,M;k=1,\cdots,K \end{align}
\begin{align}P(z_k|d_i)=\frac{\sum\limits_{i=1}^Nn(d_i,w_j)P(z_k|d_i,w_j)}{\rho_i}, \quad i=1,\cdots,N;k=1,\cdots,K \tag{11} \end{align}
\rho_i=\sum\limits_{k=1}^K\sum\limits_{i=1}^Nn(d_i,w_j)P(z_k|d_i,w_j), i=1,\cdots,N \tag{12}
\begin{align}P(w_j|z_k)=\frac{\sum\limits_{i=1}^Nn(d_i,w_j)P(z_k|d_i,w_j)}{\sum\limits_{j=1}^M\sum\limits_{i=1}^Nn(d_i,w_j)P(z_k|d_i,w_j)}, \quad j=1,\cdots,M;k=1,\cdots,K \end{align}
\begin{align}P(z_k|d_i)=\frac{\sum\limits_{i=1}^Nn(d_i,w_j)P(z_k|d_i,w_j)}{\sum\limits_{k=1}^K\sum\limits_{i=1}^Nn(d_i,w_j)P(z_k|d_i,w_j)}, \quad i=1,\cdots,N;k=1,\cdots,K \end{align}