阿晨1998
不使用任何 for 循环的解决方案import pandas as pdimport numpu as npdf = pd.DataFrame({'name':'A B C D E' .split(),'ids':['147 616 813','51 616 13 813','776','51 671 13 813 1092','13 404 492 903 1093']})#Every input of i_d to functions in int#to get indexes where id occursdef rows(i_d): i_d = str(i_d) pattern1 = "[^0-9]" +i_d+"[^0-9]" pattern2 = i_d+"[^0-9]" pattern3 = "[^0-9]" +i_d mask = df.ids.apply(lambda x: True if (len(re.findall(pattern1,x)) > 0) | (len(re.findall(pattern2,x))) | (len(re.findall(pattern3,x)) > 0) else False) return df[mask].index.tolist()#to get other ids occuring with the id in discussiondef colleagues(i_d): i_d = str(i_d) df.loc[rows(i_d),'temp'] = 1 k =list(set(df.groupby('temp').ids.apply(lambda x: ' '.join(x)).iloc[0].split())) k.remove(i_d) df.drop('temp',axis=1,inplace=True) return k#to get row indexes where 2 ids occur togetherdef third(i_d1,i_d2): i_d1 = str(i_d1) i_d2 = str(i_d2) common_rows = list(np.intersect1d(rows(i_d1),rows(i_d2))) if len(common_rows) > 0: return print('Occured together at rows ',common_rows) else: return print("Didn't occur together")
Qyouu
这是三个功能的建议:import pandas as pd# first we create the datadata = pd.DataFrame({'name': ['A','B','C','D','E'], 'ids': ['147 616 813','51 616 13 813','776','51 671 13 813 1092','13 404 492 903 1093']})def func1(num, series): # num must be an int # series a Pandas series tx = series.apply(lambda x: True if str(num) in x.split() else False) output_list = series.index[tx].tolist() return output_list def func2(num, series): # num must be an int # series a Pandas series series = series.iloc[func1(num, series)] series = series.apply(lambda x: x.split()).tolist() output_list = set([item for sublist in series for item in sublist]) output_list.remove(str(num)) return list(output_list)def func3(num1,num2,series): # num1 must be an int # num2 must be an int # series a Pandas series if str(num1) in func2(num2, series): num1_index = func1(num1, series) num2_index = func1(num2, series) return list(set(num1_index) & set(num2_index)) else: return 'no match'然后你可以测试它们:func1(13, data['ids'])func2(13, data['ids'])func3(13,51,data['ids'])