#(11)Frequent Values
df.iloc[:,0].value_counts().iloc[0:5,]
df.iloc[:,0][~np.isin(df.iloc[:,0], missSet)]
df.iloc[:,0][~np.isin(df.iloc[:,0], missSet)].value_counts()[0:5,]
json_fre_name = {}
json_fre_count = {}
def fill_fre_top_5(x):
if len(x) <= 5:
new_array = np.full(5, np.nan)
new_array[0:len(x)] = x
return new_array
df['ind_var1_0'].value_counts()
len(df['imp_sal_var16_ult1'].value_counts())
for i,name in enumerate(df[['ind_var1_0','imp_sal_var16_ult1']].columns):
index_name = df[name][~np.isin(df[name], missSet)].value_counts().iloc[0:5,].index.values
index_name = fill_fre_top_5(index_name)
json_fre_name[name] = index_name
values_count = df[name][~np.isin(df[name], missSet)].value_counts().iloc[0:5,].values
values_count = fill_fre_top_5(values_count)
json_fre_count[name] = values_count
df_fre_name = pd.DataFrame(json_fre_name)[df[['ind_var1_0','imp_sal_var16_ult1']].columns].T
df_fre = pd.concat([df_fre_name, df_fre_count], axis=1)
good = np.array([json_fre_name['ind_var1_0'],
json_fre_count['ind_var1_0'],
json_fre_name['imp_sal_var16_ult1'],
json_fre_count['imp_sal_var16_ult1']])
Mindex = pd.MultiIndex.from_product([['ind_var1_0','imp_sal_var16_ult1'],['key','nums']])
df_good = pd.DataFrame(good,index=Mindex)
你好 为什么我在
index_name = fill_fre_top_5(index_name)
json_fre_name[name] = index_name
这里会报错呢?
很好啊