gpt4 book ai didi

python - 期望最大化中的奇异协方差矩阵

转载 作者:行者123 更新时间:2023-12-05 05:44:52 25 4
gpt4 key购买 nike

我试图在 python 中编写一个 EM 算法,用于对不同类型的图像进行聚类。我对EM算法的理解是这样的:

EM Algo

因此,我用 python 编写了相同的代码。这是我的代码:

import numpy as np
import sys
from scipy import stats

# μ: mean vector ∈ R^(self.m x self.n)
# Σ: covariance matrix ∈ R^(self.m x self.n x self.n)
# π: probabilities of each component

class gmm:
def __init__(self,n_components):
self.m = n_components
self.π = np.random.random((self.m,))
self.x = None
self.Σ = None
self.μ = None
self.r = None
self.n = None # length of data

@classmethod
def N(cls, x, μ, Σ):
#print(Σ)
#print('\n')
return stats.multivariate_normal(μ, Σ).pdf(x)

def E_step(self):
for i,x_i in enumerate(self.x):
den = 0
for c in range(self.m):
#print(self.Σ[c].shape)
#print(self.μ[c].shape)
#sys.exit()
den+= self.π[c]*gmm.N(x_i,self.μ[c],self.Σ[c])
#print(f'{den=}')
for c in range(self.m):
num = self.π[c]*gmm.N(x_i,self.μ[c],self.Σ[c])
self.r[i,c] = num/den
print(f'{self.r}\n')

def M_step(self):
m_c = np.sum(self.r, axis = 0)
self.π = m_c/self.m
for c in range(self.m):
s1 = 0
s2 = 0
for i in range(self.n):
s1+= self.r[i,c]*self.x[i]
s2+= self.r[i,c]*(self.x[i]-self.μ[c]).dot(self.x[i]-self.μ[c])
self.μ[c] = s1/m_c[c]
self.Σ[c] = s2/m_c[c]

def fit(self,x, iterations = 10):
self.x = x
self.n = x.shape[0]
self.r = np.empty((self.n, self.m))
self.μ = np.random.random((self.m, self.n))
Sigma = [np.random.random((self.n, self.n)) for i in range(self.m)]
Sigma = [0.5*(s + s.T)+5*np.eye(s.shape[0]) for s in Sigma] # A symmetric diagonally dominant matrix is PD
#print([np.linalg.det(s) for s in Sigma])
self.Σ = np.asarray(Sigma)
for i in range(iterations):
self.E_step()
self.M_step()

def params():
return self.π, self.μ, self.Σ

if __name__ == '__main__':
data_dim = 5 # No. of data dimensions
data = [[]]*6
data[0] = np.random.normal(0.5,2, size = (300,))
data[1] = np.random.normal(3,2, size = (300,))
data[2] = np.random.normal(-1, 0.1, size = (300,))
data[3] = np.random.normal(2,3.14159, size = (300,))
data[4] = np.random.normal(0,1, size = (300,))
data[5] = np.random.normal(3.5,5, size = (300,))

p = [0.1, 0.15, 0.22, 0.3, 0.2, 0.03]
vals = [0,1,2,3,4,5]
combined = []
for i in range(data_dim):
choice = np.random.choice(vals, p = p)
combined.append(np.random.choice(data[choice]))

combined = np.array(combined)

G = gmm(n_components = 6)
G.fit(combined)
pi, mu, sigma = G.params()
print(pi)
print(mu)
print(sigma)

问题来了。运行代码时,协方差矩阵 Σ 在一些迭代后变为奇异矩阵。具体来说,在特定迭代中,Sigma 的所有条目突然变得相同。

我曾尝试在发生这种情况时向 Σ 添加一些随机噪声,但这似乎只会延迟不可避免的事情。任何帮助/意见将不胜感激。提前致谢:)

最佳答案

为了防止协方差矩阵变得奇异,您可以沿矩阵的对角线添加任意值,即

val * np.identity(size)

因为这确保协方差矩阵将保持正定,并且具有逆矩阵。例如, sklearn 使用默认值 1e-6 进行正则化。

关于python - 期望最大化中的奇异协方差矩阵,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/71527991/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com