熊猫迭代有性能问题吗?
import pandas as pdimport numpy as npimport time
s1 = np.random.randn(2000000)s2 = np.random.randn(2000000)dfa = pd.DataFrame({'s1': s1, 's2': s2})start = time.time()i=0for rowindex,
row in dfa.iterrows():
i+=1end = time.time()print end - startimport pandas as pdimport numpy as np#%% Create the original tablest1 = {'letter':['a','b'],
'number1':[50,-10]}t2 = {'letter':['a','a','b','b'],
'number2':[0.2,0.5,0.1,0.4]}table1 = pd.DataFrame(t1)table2 = pd.DataFrame(t2)
#%% Create the body of the new tabletable3 = pd.DataFrame(np.nan, columns=['letter','number2'], index=[0])
#%% Iterate through filtering relevant data, optimizing, returning infofor row_index, row in table1.iterrows():
t2info = table2[table2.letter == row['letter']].reset_index()
table3.ix[row_index,] = optimize(t2info,row['number1'])#%% Define optimizationdef optimize(t2info, t1info):
calculation = []
for index, r in t2info.iterrows():
calculation.append(r['number2']*t1info)
maxrow = calculation.index(max(calculation))
return t2info.ix[maxrow]ITMISS
繁花不似锦
Cats萌萌
相关分类