如何在 python 中实现 EM-GMM？

# Expectation steplikelihood = PDF(data, means, np.sqrt(variances))我们为什么要sqrt过去variances？pdf 函数接受差异。所以这应该是PDF(data, means, variances)。另一个问题，# Maximization stepb = likelihood * weights # shape=(k, n)b /= np.sum(b, axis=1)[:, np.newaxis] + eps上面第二行应该是b /= np.sum(b, axis=0)[:, np.newaxis] + eps同样在的初始化中variances，variances = np.random.random_sample(size=k)[:, np.newaxis] # shape=(k, 1)为什么我们要随机初始化方差？我们有data和means，为什么不像中那样计算当前估计方差vars = np.expand_dims(np.mean(np.square(data - means), axis=1), -1)？通过这些更改，这是我的实现，import numpy as npimport seaborn as snsimport matplotlib.pyplot as pltplt.style.use('seaborn')eps=1e-8def pdf(data, means, vars):    denom = np.sqrt(2 * np.pi * vars) + eps    numer = np.exp(-0.5 * np.square(data - means) / (vars + eps))    return numer /denomdef em_gmm(data, k, n_iter, init_strategy='k_means'):    weights = np.ones((k, 1), dtype=np.float32) / k    if init_strategy == 'k_means':        from sklearn.cluster import KMeans        km = KMeans(k).fit(data[:, None])        means = km.cluster_centers_    else:        means = np.random.choice(data, k)[:, np.newaxis]    data = np.repeat(data[np.newaxis, :], k, 0)    vars = np.expand_dims(np.mean(np.square(data - means), axis=1), -1)    for step in range(n_iter):        p = pdf(data, means, vars)        b = p * weights        denom = np.expand_dims(np.sum(b, axis=0), 0) + eps        b = b / denom        means_n = np.sum(b * data, axis=1)        means_d = np.sum(b, axis=1) + eps        means = np.expand_dims(means_n / means_d, -1)        vars = np.sum(b * np.square(data - means), axis=1) / means_d        vars = np.expand_dims(vars, -1)        weights = np.expand_dims(np.mean(b, axis=1), -1)    return means, varsdef main():    s = np.array([25.31, 24.31, 24.12, 43.46, 41.48666667,                  41.48666667, 37.54, 41.175, 44.81, 44.44571429,                  44.44571429, 44.44571429, 44.44571429, 44.44571429, 44.44571429,                  44.44571429, 44.44571429, 44.44571429, 44.44571429, 44.44571429,                  44.44571429, 44.44571429, 39.71, 26.69, 34.15,                  24.94, 24.75, 24.56, 24.38, 35.25,                  44.62, 44.94, 44.815, 44.69, 42.31,                  40.81, 44.38, 44.56, 44.44, 44.25,                  43.66666667, 43.66666667, 43.66666667, 43.66666667, 43.66666667,                  40.75, 32.31, 36.08, 30.135, 24.19])    k = 3    n_iter = 100    means, vars = em_gmm(s, k, n_iter)    y = 0    colors = ['green', 'red', 'blue', 'yellow']    bins = np.linspace(np.min(s) - 2, np.max(s) + 2, 100)    plt.figure(figsize=(10, 7))    plt.xlabel('$x$')    plt.ylabel('pdf')    sns.scatterplot(s, [0.0] * len(s), color='navy', s=40, marker=2, label='Series data')    for i, (m, v) in enumerate(zip(means, vars)):        sns.lineplot(bins, pdf(bins, m, v), color=colors[i], label=f'Cluster {i + 1}')    plt.legend()    plt.plot()    plt.show()    pass这是我的结果。

如何在 python 中实现 EM-GMM？

2回答