侃侃无极
试试这个:import pandas as pdimport numpy as npdef sales_rank(x, df): df_ns = df.set_index('id') df_ns = df_ns.loc[x.neighbors, 'sales'] df_ns.sort_values(ascending=False, inplace=True) df_ns = df_ns.reset_index() return df_ns[df_ns.id == x.id].index[0]df = pd.DataFrame(data={'id': range(5), 'weight': [20, 21, 23, 43, 22], 'sales':[200, 100, 140, 100, 100]})df['neighbors'] = df.weight.apply(lambda x: df.id[np.isclose(df.weight.values, x, rtol=0.10)].values)df['sales_rank_in_neighborhood'] = df.apply(lambda x: sales_rank(x, df) , axis=1)df['top_seller'] = df.apply(lambda x: x.sales_rank_in_neighborhood < len(x.neighbors)//2, axis=1)print(df)输出 id weight sales neighbors sales_rank_in_neighborhood top_seller0 0 20 200 [0, 1, 4] 0 True1 1 21 100 [0, 1, 2, 4] 3 False2 2 23 140 [1, 2, 4] 0 True3 3 43 100 [3] 0 False4 4 22 100 [0, 1, 2, 4] 2 False请注意,单元素社区中没有畅销商品。调整规则以适合您的目的。我希望这有帮助!编辑我添加了一个组解决方案,多个定义邻域的规则和固定销售排名实现:import pandas as pdimport numpy as npdef ns(x, df): weight_rule = np.isclose(df.weight.values, x.weight, rtol=0.10) gear_rule = df.gear == x.gear type_rule = df.type == x.type return df.id[np.logical_and.reduce((weight_rule, gear_rule, type_rule))].valuesdef sales_rank(x, df): df_ns = df.set_index('id') df_ns = df_ns.loc[x.neighbors, 'sales'] df_ns.sort_values(ascending=False, inplace=True) df_ns = df_ns.reset_index() return df_ns[df_ns.id == x.id].index[0]df = pd.DataFrame(data={'store_id': [0, 1, 0, 1, 0], 'id': range(5), 'weight': [20, 21, 23, 43, 22], 'gear': [3, 3, 3, 7, 3], 'type':['mountain', 'mountain', 'mountain', 'bmx', 'mountain'], 'sales':[200, 100, 140, 100, 100]})# Columns for resultsdf['neighbors'] = ''df['sales_rank_in_neighborhood'] = ''df['top_seller'] = ''groups = df.groupby('store_id')for _, g in groups: df_temp = df.loc[g.index, :] df_temp.neighbors = df_temp.apply(lambda x: ns(x, df_temp), axis=1) df_temp.sales_rank_in_neighborhood = df_temp.apply(lambda x: sales_rank(x, df_temp), axis=1) df_temp.top_seller = df_temp.apply(lambda x: x.sales_rank_in_neighborhood < len(x.neighbors)//2, axis=1) df.loc[g.index, :] = df_tempprint(df)输出 store_id id weight gear type sales neighbors sales_rank_in_neighborhood top_seller0 0 0 20 3 mountain 200 [0, 4] 0 True1 1 1 21 3 mountain 100 [1] 0 False2 0 2 23 3 mountain 140 [2, 4] 0 True3 1 3 43 7 bmx 100 [3] 0 False4 0 4 22 3 mountain 100 [0, 2, 4] 2 False我想会有一种方法可以避免循环遍历组,但这似乎可以解决问题。