1.衍生方式一

df=pd.DataFrame({'id':[2,2,2,3,3,5],'cur':['cur1','cur2','cur3','cur1','cur1','cur2']})
df

python衍生特征_python

df_tmp=pd.crosstab(df['id'],df['cur'])
df_tmp

python衍生特征_编程语言_02

'''
'''
df_new=pd.DataFrame({'id':list(set(df.id))})
df_new['cur1_count']=0
df_new['cur_count_all']=0
df_new['cur_cate_num']=0
for id in list(set(df.id)):
    df_new.loc[df_new.id==id,'cur1_count']=df_tmp.loc[id,'cur1']
    df_new.loc[df_new.id==id,'cur_count_all']=df_tmp.loc[id,'cur1':'cur3'].sum()
    df_new.loc[df_new.id==id,'cur_cate_num']=len(np.where(df_tmp.loc[id,'cur1':'cur3']>0)[0])
df_new.head()

python衍生特征_分享_03