慕雪6442864
用:a, b = [],[]for n1, n2, w1, w2 in zip(df['Name_prev'], df['Name_now'], df['Weight_prev'], df['Weight_now']): #get intersection of lists n = [val for val in n1 if val in n2] #get indices by enumerate and select weights w3 = [w1[i] for i, val in enumerate(n1) if val in n2] w4 = [w2[i] for i, val in enumerate(n2) if val in n1] #divide each value in list w = [i/j for i, j in zip(w3, w4)] a.append(n) b.append(w)df = df.assign(name=a, weight=b)print (df) Id Name_prev Weight_prev Name_now Weight_now \0 1 [1, 3, 4, 5] [10, 34, 67, 37] [1, 3, 5] [45, 76, 12] 1 2 [10, 3, 40, 5] [100, 134, 627, 347] [10, 40, 5] [34, 56, 78] 2 3 [1, 30, 4, 50] [11, 22, 45, 67] [1, 30, 50] [12, 45, 78] 3 4 [1, 7, 8, 9] [32, 54, 76, 98] [7, 8, 9] [34, 12, 32] name weight 0 [1, 3, 5] [0.2222222222222222, 0.4473684210526316, 3.083... 1 [10, 40, 5] [2.9411764705882355, 11.196428571428571, 4.448... 2 [1, 30, 50] [0.9166666666666666, 0.4888888888888889, 0.858... 3 [7, 8, 9] [1.588235294117647, 6.333333333333333, 3.0625] 如果需要删除原始列使用DataFrame.pop:a, b = [],[]for n1, n2, w1, w2 in zip(df.pop('Name_prev'), df.pop('Name_now'), df.pop('Weight_prev'), df.pop('Weight_now')): n = [val for val in n1 if val in n2] w3 = [w1[i] for i, val in enumerate(n1) if val in n2] w4 = [w2[i] for i, val in enumerate(n2) if val in n1] w = [i/j for i, j in zip(w3, w4)] a.append(n) b.append(w)df = df.assign(name=a, weight=b)print (df) Id name weight0 1 [1, 3, 5] [0.2222222222222222, 0.4473684210526316, 3.083...1 2 [10, 40, 5] [2.9411764705882355, 11.196428571428571, 4.448...2 3 [1, 30, 50] [0.9166666666666666, 0.4888888888888889, 0.858...3 4 [7, 8, 9] [1.588235294117647, 6.333333333333333, 3.0625]编辑:在 Pandas 中使用列表总是不是矢量化的,所以最好先扁平化列表,merge如果需要的话聚合lists:from itertools import chaindf_prev = pd.DataFrame({ 'Name' : list(chain.from_iterable(df['Name_prev'].values.tolist())), 'Weight_prev' : list(chain.from_iterable(df['Weight_prev'].values.tolist())), 'Id' : df['Id'].values.repeat(df['Name_prev'].str.len())})print (df_prev) Name Weight_prev Id0 1 10 11 3 34 12 4 67 13 5 37 14 10 100 25 3 134 26 40 627 27 5 347 28 1 11 39 30 22 310 4 45 311 50 67 312 1 32 413 7 54 414 8 76 415 9 98 4df_now = pd.DataFrame({ 'Name' : list(chain.from_iterable(df['Name_now'].values.tolist())), 'Weight_now' : list(chain.from_iterable(df['Weight_now'].values.tolist())), 'Id' : df['Id'].values.repeat(df['Name_now'].str.len())})print (df_now) Name Weight_now Id0 1 45 11 3 76 12 5 12 13 10 34 24 40 56 25 5 78 26 1 12 37 30 45 38 50 78 39 7 34 410 8 12 411 9 32 4df = df_prev.merge(df_now, on=['Id','Name'])df['Weight'] = df['Weight_prev'] / df['Weight_now']print (df) Name Weight_prev Id Weight_now Weight0 1 10 1 45 0.2222221 3 34 1 76 0.4473682 5 37 1 12 3.0833333 10 100 2 34 2.9411764 40 627 2 56 11.1964295 5 347 2 78 4.4487186 1 11 3 12 0.9166677 30 22 3 45 0.4888898 50 67 3 78 0.8589749 7 54 4 34 1.58823510 8 76 4 12 6.33333311 9 98 4 32 3.062500df = df.groupby('Id')['Name','Weight'].agg(list).reset_index()print (df) Id Name Weight0 1 [1, 3, 5] [0.2222222222222222, 0.4473684210526316, 3.083...1 2 [10, 40, 5] [2.9411764705882355, 11.196428571428571, 4.448...2 3 [1, 30, 50] [0.9166666666666666, 0.4888888888888889, 0.858...3 4 [7, 8, 9] [1.588235294117647, 6.333333333333333, 3.0625]