如何从具有权重的数据创建箱线图？

这里有两种方法来回答这个问题。您可能会期待第一个，但它不是一个好的计算解决方案confidence intervals of the median，它具有使用示例数据的以下代码，引用matplotlib/cbook/__init__.py。因此，Second 比其他任何代码都好得多，因为它经过了很好的测试，可以比较任何其他自定义代码。def boxplot_stats(X, whis=1.5, bootstrap=None, labels=None,                  autorange=False):    def _bootstrap_median(data, N=5000):        # determine 95% confidence intervals of the median        M = len(data)        percentiles = [2.5, 97.5]        bs_index = np.random.randint(M, size=(N, M))        bsData = data[bs_index]        estimate = np.median(bsData, axis=1, overwrite_input=True)第一的：import pandas as pdimport matplotlib.pyplot as pltimport numpy as npdata = {    "Name": ['Sara', 'John', 'Mark', 'Peter', 'Kate'],    "Count": [20, 10, 5, 2, 5],    "Score": [2, 4, 7, 8, 7]}df = pd.DataFrame(data)print(df)def boxplot(values, freqs):    values = np.array(values)    freqs = np.array(freqs)    arg_sorted = np.argsort(values)    values = values[arg_sorted]    freqs = freqs[arg_sorted]    count = freqs.sum()    fx = values * freqs    mean = fx.sum() / count    variance = ((freqs * values ** 2).sum() / count) - mean ** 2    variance = count / (count - 1) * variance  # dof correction for sample variance    std = np.sqrt(variance)    minimum = np.min(values)    maximum = np.max(values)    cumcount = np.cumsum(freqs)    print([std, variance])    Q1 = values[np.searchsorted(cumcount, 0.25 * count)]    Q2 = values[np.searchsorted(cumcount, 0.50 * count)]    Q3 = values[np.searchsorted(cumcount, 0.75 * count)]    '''    interquartile range (IQR), also called the midspread or middle 50%, or technically    H-spread, is a measure of statistical dispersion, being equal to the difference    between 75th and 25th percentiles, or between upper and lower quartiles,[1][2]    IQR = Q3 −  Q1. In other words, the IQR is the first quartile subtracted from    the third quartile; these quartiles can be clearly seen on a box plot on the data.    It is a trimmed estimator, defined as the 25% trimmed range, and is a commonly used    robust measure of scale.    '''    IQR = Q3 - Q1    '''    The whiskers add 1.5 times the IQR to the 75 percentile (aka Q3) and subtract    1.5 times the IQR from the 25 percentile (aka Q1).  The whiskers should include    99.3% of the data if from a normal distribution.  So the 6 foot tall man from    the example would be inside the whisker but my 6 foot 2 inch girlfriend would    be at the top whisker or pass it.    '''    whishi = Q3 + 1.5 * IQR    whislo = Q1 - 1.5 * IQR    stats = [{        'label': 'Scores',  # tick label for the boxplot        'mean': mean,  # arithmetic mean value        'iqr': Q3 - Q1,  # 5.0,#         'cilo': 2.0,  # lower notch around the median#         'cihi': 4.0,  # upper notch around the median        'whishi': maximum,  # end of the upper whisker        'whislo': minimum,  # end of the lower whisker        'fliers': [],  # '\array([], dtype=int64)',  # outliers        'q1': Q1,  # first quartile (25th percentile)        'med': Q2,  # 50th percentile        'q3': Q3  # third quartile (75th percentile)    }]    fs = 10  # fontsize    _, axes = plt.subplots(nrows=1, ncols=1, figsize=(6, 6), sharey=True)    axes.bxp(stats)    axes.set_title('Default', fontsize=fs)    plt.show()boxplot(df['Score'], df['Count'])第二：import pandas as pdimport seaborn as snsimport matplotlib.pyplot as pltdata = {    "Name": ['Sara', 'John', 'Mark', 'Peter', 'Kate'],    "Count": [20, 10, 5, 2, 5],    "Score": [2, 4, 7, 8, 7]}df = pd.DataFrame(data)print(df)labels = ['Scores']data = df['Score'].repeat(df['Count']).tolist()# compute the boxplot statsstats = cbook.boxplot_stats(data, labels=labels, bootstrap=10000)print(['stats :', stats])fs = 10  # fontsizefig, axes = plt.subplots(nrows=1, ncols=1, figsize=(6, 6), sharey=True)axes.bxp(stats)axes.set_title('Boxplot', fontsize=fs)plt.show()

如何从具有权重的数据创建箱线图？

2回答