LDA说的比较利索参考:https://segmentfault.com/a/1190000012215533
# -*- coding: utf-8 -*- ''' Created on 2018年5月16日 @author: user p:输入的概率分布,离散情况采用元素为概率值的数组表示 N:认为迭代N次马尔可夫链收敛 Nlmax:马尔可夫链收敛后又取的服从p分布的样本数 isMH:是否采用MH算法,默认为True ''' from __future__ import division import matplotlib.pyplot as plt import numpy as np from array import array def mcmc(p,N=10000,Nlmax=10000,isMH=True): A = np.array([p for y in range(len(p))], dtype=np.float64) #第一步:构造转移概率矩阵 X0 = np.random.randint(len(p)) count = 0 samplecount = 0 L = array("d",[X0]) l = array("d") while True: X = int(L[samplecount])#第二步:初始化x0 cur = np.argmax(np.random.multinomial(1,A[X]))#第三步:采样候选样本 count += 1 if isMH: a = (p[cur]*A[cur][X])/(p[X]*A[X][cur])#第四步:计算是否满足马氏平稳条件 alpha = min(a,1) else: alpha = p[cur]*A[cur][X] u = np.random.uniform(0,1) #第五步:生成阈值 if u<alpha:#第六步:是否接受样本 samplecount += 1 L.append(cur) if count>N: l.append(cur) if len(l)>=Nlmax: break else: continue La = np.frombuffer(L) la = np.frombuffer(l) return La,la def count(q,n): L = array("d") l1 = array("d") l2 = array("d") for e in q: L.append(e) for e in range(n): l1.append(L.count(e)) for e in l1: l2.append(e/sum(l1)) return l1,l2 if __name__ == '__main__': p = np.array([0.6,0.3,0.1]) a = mcmc(p)[1] l1 = ['state%d'% x for x in range(len(p))] plt.pie(count(a,len(p))[0],labels=l1,labeldistance=0.3,autopct='%1.2f%%') plt.title("sampling") plt.show()