日期相关操作

#数据集3的特征为 取 线上数据中领券和用券日期大于3月15日和小于6月30日的
feature3 = off_train[((off_train.date>='20160315')&(off_train.date<='20160630'))|((off_train.date=='null')&(off_train.date_received>='20160315')&(off_train.date_received<='20160630'))]
feature3.head()

#获取最大日期,最小日期
t2['max_date_received'] = t2.date_received.apply(lambda s:max([int(d) for d in s.split(':')]))
t2['min_date_received'] = t2.date_received.apply(lambda s:min([int(d) for d in s.split(':')]))
t2 = t2[['user_id','coupon_id','max_date_received','min_date_received']]

获取时间差

#显示时间是第几周
dataset3['day_of_week'] = dataset3['Date_received'].astype('str').apply(lambda x:date(int(x[0:4]),int(x[4:6]),int(x[6:8])).weekday()+1)
#显示时间是几月
dataset3['day_of_month'] = dataset3['Date_received'].astype('str').apply(lambda x:int(x[6:8]))
#显示时期和截止日之间的 天数 时间差
dataset3['days_distance'] = dataset3['Date_received'].astype('str').apply(lambda x:(date(int(x[0:4]),int(x[4:6]),int(x[6:8]))-date(2016,6,30)).days)



def get_day_gap_before(s):
date_received,dates = s.split('-')
dates = dates.split(':')
gaps = []
for d in dates:
#根据日期求取时间差,天数
this_gap = (date(int(date_received[0:4]),int(date_received[4:6]),int(date_received[6:8]))-date(int(d[0:4]),int(d[4:6]),int(d[6:8]))).days
if this_gap>0:
gaps.append(this_gap)
if len(gaps)==0:
return -1
else:
return min(gaps)

def get_day_gap_after(s):
date_received,dates = s.split('-')
dates = dates.split(':')
gaps = []
for d in dates:
#根据日期求取时间差,天数
this_gap = (date(int(d[0:4]),int(d[4:6]),int(d[6:8]))-date(int(date_received[0:4]),int(date_received[4:6]),int(date_received[6:8]))).days
if this_gap>0:
gaps.append(this_gap)
if len(gaps)==0:
return -1
else:
return min(gaps)


t7 = dataset3[['user_id','coupon_id','date_received']]
t7 = pd.merge(t7,t6,on=['user_id','coupon_id'],how='left')
#将两个字段拼接,中间使用短横
t7['date_received_date'] = t7.date_received.astype('str') + '-' + t7.dates
#应用函数 apply
t7['day_gap_before'] = t7.date_received_date.apply(get_day_gap_before)
t7['day_gap_after'] = t7.date_received_date.apply(get_day_gap_after)
t7 = t7[['user_id','coupon_id','date_received','day_gap_before','day_gap_after']]

python_日期相关操作_数据


python_日期相关操作_数据集_02

获取最大日期和最小日期

先拼接,再查找

#将数据分组,同组日期使用冒号相连 join
t2 = t2.groupby(['user_id','coupon_id'])['date_received'].agg(lambda x:':'.join(x)).reset_index()
t2['receive_number'] = t2.date_received.apply(lambda s:len(s.split(':')))
t2 = t2[t2.receive_number>1]
#计算最大日期和最小日期
t2['max_date_received'] = t2.date_received.apply(lambda s:max([int(d) for d in s.split(':')]))
t2['min_date_received'] = t2.date_received.apply(lambda s:min([int(d) for d in s.split(':')]))
t2 = t2[['user_id','coupon_id','max_date_received','min_date_received']]