日期相关操作 #数据集3的特征为 取 线上数据中领券和用券日期大于3月15日和小于6月30日的 feature3 = off_train [(( off_train . date = '20160315' ) ( off_train . date = '20160630' )) | (( off_train . date == 'nu
日期相关操作
#数据集3的特征为 取 线上数据中领券和用券日期大于3月15日和小于6月30日的feature3 = off_train[((off_train.date>='20160315')&(off_train.date<='20160630'))|((off_train.date=='null')&(off_train.date_received>='20160315')&(off_train.date_received<='20160630'))]
feature3.head()
#获取最大日期,最小日期
t2['max_date_received'] = t2.date_received.apply(lambda s:max([int(d) for d in s.split(':')]))
t2['min_date_received'] = t2.date_received.apply(lambda s:min([int(d) for d in s.split(':')]))
t2 = t2[['user_id','coupon_id','max_date_received','min_date_received']]
获取时间差
#显示时间是第几周dataset3['day_of_week'] = dataset3['Date_received'].astype('str').apply(lambda x:date(int(x[0:4]),int(x[4:6]),int(x[6:8])).weekday()+1)
#显示时间是几月
dataset3['day_of_month'] = dataset3['Date_received'].astype('str').apply(lambda x:int(x[6:8]))
#显示时期和截止日之间的 天数 时间差
dataset3['days_distance'] = dataset3['Date_received'].astype('str').apply(lambda x:(date(int(x[0:4]),int(x[4:6]),int(x[6:8]))-date(2016,6,30)).days)
def get_day_gap_before(s):
date_received,dates = s.split('-')
dates = dates.split(':')
gaps = []
for d in dates:
#根据日期求取时间差,天数
this_gap = (date(int(date_received[0:4]),int(date_received[4:6]),int(date_received[6:8]))-date(int(d[0:4]),int(d[4:6]),int(d[6:8]))).days
if this_gap>0:
gaps.append(this_gap)
if len(gaps)==0:
return -1
else:
return min(gaps)
def get_day_gap_after(s):
date_received,dates = s.split('-')
dates = dates.split(':')
gaps = []
for d in dates:
#根据日期求取时间差,天数
this_gap = (date(int(d[0:4]),int(d[4:6]),int(d[6:8]))-date(int(date_received[0:4]),int(date_received[4:6]),int(date_received[6:8]))).days
if this_gap>0:
gaps.append(this_gap)
if len(gaps)==0:
return -1
else:
return min(gaps)
t7 = dataset3[['user_id','coupon_id','date_received']]
t7 = pd.merge(t7,t6,on=['user_id','coupon_id'],how='left')
#将两个字段拼接,中间使用短横
t7['date_received_date'] = t7.date_received.astype('str') + '-' + t7.dates
#应用函数 apply
t7['day_gap_before'] = t7.date_received_date.apply(get_day_gap_before)
t7['day_gap_after'] = t7.date_received_date.apply(get_day_gap_after)
t7 = t7[['user_id','coupon_id','date_received','day_gap_before','day_gap_after']]
获取最大日期和最小日期
先拼接,再查找
#将数据分组,同组日期使用冒号相连 joint2 = t2.groupby(['user_id','coupon_id'])['date_received'].agg(lambda x:':'.join(x)).reset_index()
t2['receive_number'] = t2.date_received.apply(lambda s:len(s.split(':')))
t2 = t2[t2.receive_number>1]
#计算最大日期和最小日期
t2['max_date_received'] = t2.date_received.apply(lambda s:max([int(d) for d in s.split(':')]))
t2['min_date_received'] = t2.date_received.apply(lambda s:min([int(d) for d in s.split(':')]))
t2 = t2[['user_id','coupon_id','max_date_received','min_date_received']]