使用可用值集绘制正态分布曲线的问题

http://img.mukewang.com/63aaa0270001850406380463.jpg

我正在尝试绘制一组值的正态分布曲线。不幸的是,下面的代码(取自这篇文章)似乎没有在直方图上正确绘制曲线(请参考附图)。我确定我遗漏了什么或做了一些愚蠢的事情,但似乎无法弄清楚。有人可以帮忙吗?我在下面包含了我的代码——我从数据框中获取值,但s为了方便起见将它们包含在列表中:

import numpy as np

import scipy

import pandas as pd

from scipy.stats import norm

import matplotlib.pyplot as plt

from matplotlib.mlab import normpdf

mu = 0

sigma = 1

n_bins = 50

s = [8, 8, 4, 4, 1, 14, 0, 10, 1, 4, 21, 9, 5, 2, 7, 6, 7, 9, 7, 3, 3, 4, 7, 9, 9, 4, 10, 8, 10, 10, 7, 10, 1, 8, 7, 8, 1, 7, 4, 15, 8, 1, 1, 6, 7, 3, 8, 8, 8, 4][![enter image description here][1]][1]

fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True)


#histogram

n, bins, patches = axes[1].hist(s, n_bins, normed=True, alpha=.1, edgecolor='black' )

pdf = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2))

print(pdf)

median, q1, q3 = np.percentile(s, 50), np.percentile(s, 25), np.percentile(s, 75)


#probability density function

axes[1].plot(bins, pdf, color='orange', alpha=.6)


#to ensure pdf and bins line up to use fill_between.

bins_1 = bins[(bins >= q1-1.5*(q3-q1)) & (bins <= q1)] # to ensure fill starts from Q1-1.5*IQR

bins_2 = bins[(bins <= q3+1.5*(q3-q1)) & (bins >= q3)]

pdf_1 = pdf[:int(len(pdf)/2)]

pdf_2 = pdf[int(len(pdf)/2):]

pdf_1 = pdf_1[(pdf_1 >= norm(mu,sigma).pdf(q1-1.5*(q3-q1))) & (pdf_1 <= norm(mu,sigma).pdf(q1))]

pdf_2 = pdf_2[(pdf_2 >= norm(mu,sigma).pdf(q3+1.5*(q3-q1))) & (pdf_2 <= norm(mu,sigma).pdf(q3))]


#fill from Q1-1.5*IQR to Q1 and Q3 to Q3+1.5*IQR

#axes[1].fill_between(bins_1, pdf_1, 0, alpha=.6, color='orange')

#axes[1].fill_between(bins_2, pdf_2, 0, alpha=.6, color='orange')



繁花不似锦
浏览 112回答 2
2回答

小唯快跑啊

您已分别将mu和sigma任意设置为0和 ,1但您应该针对实际数据计算它:data = pd.Series(s)mu = data.mean()sigma = data.std()使用完整的工作示例进行更新:import numpy as npimport scipyimport pandas as pdfrom scipy.stats import normimport matplotlib.pyplot as pltn_bins = 50s = [8, 8, 4, 4, 1, 14, 0, 10, 1, 4, 21, 9, 5, 2, 7, 6, 7, 9, 7, 3, 3, 4, 7, 9, 9, 4, 10, 8, 10, 10, 7, 10, 1, 8, 7, 8, 1, 7, 4, 15, 8, 1, 1, 6, 7, 3, 8, 8, 8, 4]fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True)#histogramn, bins, patches = axes[1].hist(s, n_bins, density=True, alpha=.1, edgecolor='black' )data = pd.Series(s)mu = data.mean()sigma = data.std()pdf = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2))median, q1, q3 = np.percentile(s, 50), np.percentile(s, 25), np.percentile(s, 75)#probability density functionaxes[1].plot(bins, pdf, color='orange', alpha=.6)#fill from Q1-1.5*IQR to Q1 and Q3 to Q3+1.5*IQRiqr = 1.5 * (q3-q1)x1 = np.linspace(q1 - iqr, q1)x2 = np.linspace(q3, q3 + iqr)pdf1 = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(x1-mu)**2/(2*sigma**2))pdf2 = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(x2-mu)**2/(2*sigma**2))axes[1].fill_between(x1, pdf1, 0, alpha=.6, color='orange')axes[1].fill_between(x2, pdf2, 0, alpha=.6, color='orange')#add text to bottom graph.axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q1)&nbsp; &nbsp; -norm(mu, sigma).cdf(q1-iqr))), xy=(q1-iqr/2, 0), ha='center')axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3)&nbsp; &nbsp; -norm(mu, sigma).cdf(q1)&nbsp; &nbsp; )), xy=(median&nbsp; , 0), ha='center')axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3+iqr)-norm(mu, sigma).cdf(q3)&nbsp; &nbsp; )), xy=(q3+iqr/2, 0), ha='center')axes[1].annotate('q1', xy=(q1, norm(mu, sigma).pdf(q1)), ha='center')axes[1].annotate('q3', xy=(q3, norm(mu, sigma).pdf(q3)), ha='center')axes[1].set_ylabel('Probability Density')#top boxplotaxes[0].boxplot(s, 0, 'gD', vert=False)axes[0].axvline(median, color='orange', alpha=.6, linewidth=.5)axes[0].axis('off')

德玛西亚99

把它全部放在一个函数中:# import warnings filterfrom warnings import simplefilter# ignore all future warningssimplefilter(action='ignore', category=FutureWarning)def CTD(df):&nbsp; &nbsp; for col in df.columns:&nbsp; &nbsp; &nbsp; &nbsp; n_bins = 50&nbsp; &nbsp; &nbsp; &nbsp; fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True)&nbsp; &nbsp; &nbsp; &nbsp; #histogram&nbsp; &nbsp; &nbsp; &nbsp; n, bins, patches = axes[1].hist(boston[col], n_bins, density=True, alpha=.1, edgecolor='black' )&nbsp; &nbsp; &nbsp; &nbsp; #data = pd.Series(s)&nbsp; &nbsp; &nbsp; &nbsp; mu = boston[col].mean()&nbsp; &nbsp; &nbsp; &nbsp; sigma = boston[col].std()&nbsp; &nbsp; &nbsp; &nbsp; pdf = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(bins-mu)**2/(2*sigma**2))&nbsp; &nbsp; &nbsp; &nbsp; median, q1, q3 = np.percentile(boston.age, 50), np.percentile(boston[col], 25), np.percentile(boston[col], 75)&nbsp; &nbsp; &nbsp; &nbsp; #probability density function&nbsp; &nbsp; &nbsp; &nbsp; axes[1].plot(bins, pdf, color='orange', alpha=.6)&nbsp; &nbsp; &nbsp; &nbsp; #axes[1].figsize=(10,20)&nbsp; &nbsp; &nbsp; &nbsp; #fill from Q1-1.5*IQR to Q1 and Q3 to Q3+1.5*IQR&nbsp; &nbsp; &nbsp; &nbsp; iqr = 1.5 * (q3-q1)&nbsp; &nbsp; &nbsp; &nbsp; x1 = np.linspace(q1 - iqr, q1)&nbsp; &nbsp; &nbsp; &nbsp; x2 = np.linspace(q3, q3 + iqr)&nbsp; &nbsp; &nbsp; &nbsp; pdf1 = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(x1-mu)**2/(2*sigma**2))&nbsp; &nbsp; &nbsp; &nbsp; pdf2 = 1/(sigma*np.sqrt(2*np.pi))*np.exp(-(x2-mu)**2/(2*sigma**2))&nbsp; &nbsp; &nbsp; &nbsp; axes[1].fill_between(x1, pdf1, 0, alpha=.6, color='orange')&nbsp; &nbsp; &nbsp; &nbsp; axes[1].fill_between(x2, pdf2, 0, alpha=.6, color='orange')&nbsp; &nbsp; &nbsp; &nbsp; #add text to bottom graph.&nbsp; &nbsp; &nbsp; &nbsp; axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q1)&nbsp; &nbsp; -norm(mu, sigma).cdf(q1-iqr))), xy=(q1-iqr/2, 0), ha='center')&nbsp; &nbsp; &nbsp; &nbsp; axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3)&nbsp; &nbsp; -norm(mu, sigma).cdf(q1)&nbsp; &nbsp; )), xy=(median&nbsp; , 0), ha='center')&nbsp; &nbsp; &nbsp; &nbsp; axes[1].annotate("{:.1f}%".format(100*(norm(mu, sigma).cdf(q3+iqr)-norm(mu, sigma).cdf(q3)&nbsp; &nbsp; )), xy=(q3+iqr/2, 0), ha='center')&nbsp; &nbsp; &nbsp; &nbsp; axes[1].annotate('q1', xy=(q1, norm(mu, sigma).pdf(q1)), ha='center')&nbsp; &nbsp; &nbsp; &nbsp; axes[1].annotate('q3', xy=(q3, norm(mu, sigma).pdf(q3)), ha='center')&nbsp; &nbsp; &nbsp; &nbsp; #dashed lines&nbsp; &nbsp; &nbsp; &nbsp; plt.axvline(df[col].quantile(0),color='b', linestyle='-.')&nbsp; &nbsp; &nbsp; &nbsp; plt.axvline(df[col].quantile(0.25),color='g', linestyle='--')&nbsp; &nbsp; &nbsp; &nbsp; plt.axvline(df[col].quantile(0.50),color='g', linestyle='--')&nbsp; &nbsp; &nbsp; &nbsp; plt.axvline(df[col].quantile(0.75),color='b', linestyle='--')&nbsp; &nbsp; &nbsp; &nbsp; plt.axvline(df[col].quantile(1),color='r', linestyle='-.')&nbsp; &nbsp; &nbsp; &nbsp; axes[1].set_ylabel('Probability Density')&nbsp; &nbsp; &nbsp; &nbsp; #top boxplot&nbsp; &nbsp; &nbsp; &nbsp; axes[0].boxplot(df[col], 0, 'gD', vert=False)&nbsp; &nbsp; &nbsp; &nbsp; axes[0].axvline(median, color='orange', alpha=.6, linewidth=.5)&nbsp; &nbsp; &nbsp; &nbsp; axes[0].axis('off')&nbsp; &nbsp; &nbsp; &nbsp; plt.rcParams["figure.figsize"] = (18,10)调用函数:CTD(boston)如果这对您不起作用:试试这个:# import warnings filterfrom warnings import simplefilter# ignore all future warningssimplefilter(action='ignore', category=FutureWarning)def CTD(df):&nbsp; &nbsp; for col in df.columns:&nbsp; &nbsp; &nbsp; &nbsp; sns.set(rc={'figure.figsize':(24,6)})&nbsp; &nbsp; &nbsp; &nbsp; plt.figure()&nbsp; &nbsp; &nbsp; &nbsp; plt.subplot(121)&nbsp; &nbsp; &nbsp; &nbsp; sns.distplot(df[col])&nbsp; &nbsp; &nbsp; &nbsp; plt.axvline(np.mean(df[col]),color='b', linestyle='--') # Blue line for mean&nbsp; &nbsp; &nbsp; &nbsp; plt.axvline(np.median(df[col]),color='r', linestyle='--')# Red line for Median&nbsp; &nbsp; &nbsp; &nbsp; plt.subplot(122)&nbsp; &nbsp; &nbsp; &nbsp; sns.distplot(df[col])&nbsp; &nbsp; &nbsp; &nbsp; plt.axvline(df[col].quantile(0),color='b', linestyle='-.')&nbsp; &nbsp; &nbsp; &nbsp; plt.axvline(df[col].quantile(0.25),color='g', linestyle='--')&nbsp; &nbsp; &nbsp; &nbsp; plt.axvline(df[col].quantile(0.50),color='g', linestyle='--')&nbsp; &nbsp; &nbsp; &nbsp; plt.axvline(df[col].quantile(0.75),color='b', linestyle='--')&nbsp; &nbsp; &nbsp; &nbsp; plt.axvline(df[col].quantile(1),color='r', linestyle='-.')这在具有分位数的 KDE 图上创建虚线。
打开App,查看更多内容
随时随地看视频慕课网APP

相关分类

Python