千亿数仓第四章(指标计算_用户的退货指标分析)
退货表指标统计
参考代码:
–dw层创建fact_order_refunds表
drop table if exists
itcast_dw
.fact_order_refunds
;
create tableitcast_dw
.fact_order_refunds
(
id bigint,
orderId bigint,
goodsId bigint,
refundTo bigint,
refundReson bigint,
refundOtherReson string,
backMoney double,
refundTradeNo string,
refundRemark string,
refundTime string,
shopRejectReason string,
refundStatus bigint,
createTime string
)
partitioned by (dt string)
STORED AS PARQUET;
–从ods抽取数据到dw层中的fact_order_refunds
INSERT OVERWRITE TABLE
itcast_dw
.fact_order_refunds
PARTITION (dt=‘20190908’)
SELECT
id
,orderId
,goodsId
,refundTo
,refundReson
,refundOtherReson
,backMoney
,refundTradeNo
,refundRemark
,refundTime
,shopRejectReason
,refundStatus
,createTime
FROMitcast_ods
.itcast_order_refunds
WHERE dt=‘20190908’ ;
– 退货表指标统计
drop table if exists
itcast_ads
.tmp_user_refund_measure
;
create tableitcast_ads
.tmp_user_refund_measure
(
userId string, – 用户id
returnGoodsCount bigint, – 退货商品数量
returnGoodsMoney double, – 退货商品金额
rejectGoodsCount bigint, – 拒收商品数量
rejectGoodsMoney double, – 拒收商品金额
latestRefundTime string – 最后一次退拒时间
)
partitioned by (dt string)
STORED AS PARQUET;
– 订单明细拉宽
drop table if exists
itcast_dw
.tmp_order_goods_wide
;
create tableitcast_dw
.tmp_order_goods_wide
(
orderId bigint,
orderStatus bigint,
payType bigint,
userId bigint,
userAddressId bigint,
payTime string,
payPrice double,
goodsId bigint,
goodsNum bigint,
createtime string
)
partitioned by(dt string)
STORED AS PARQUET;
– 订单明细拉宽表导入数据
insert overwrite table
itcast_dw
.tmp_order_goods_wide
partition (dt=‘20190908’)
select
t1.orderId,
t1.orderStatus,
t1.payType,
t1.userId,
t1.userAddressId,
t1.payTime,
t2.payPrice,
t2.goodsId,
t2.goodsNum,
t2.createtime
from
(select * fromitcast_ods
.itcast_orders
where dt=‘20190908’) t1
join
(select * fromitcast_ods
.itcast_order_goods
where dt=‘20190908’) t2
on
t1.orderId = t2.orderId;
– 测试
select * from
itcast_dw
.tmp_order_goods_wide
limit 10;
insert overwrite table
itcast_ads
.tmp_user_refund_measure
partition(dt=‘20190908’)
select
t1.userId,
sum(case when t1.orderStatus = 11 then t1.goodsnum else 0 end) as returnGoodsCount, – 退货商品数量
sum(case when t1.orderStatus = 11 then t2.backMoney else 0 end) as returnGoodsMoney, – 退货金额
sum(case when t1.orderStatus = -3 then t1.goodsnum else 0 end) as rejectGoodsCount, – 拒收商品数量
sum(case when t1.orderStatus = -3 then t2.backMoney else 0 end) as rejectGoodsMoney, – 拒收商品金额
MAX(t2.createtime) as latestRefundTime – 最近一次退拒时间
from
(select * fromitcast_dw
.tmp_order_goods_wide
where orderStatus =-3 or orderStatus = 11) t1 —订单状态是退和拒的订单(以订单明细为基础的表)
left join
(select * fromitcast_dw
.fact_order_refunds
where dt=‘20190908’) t2
on t1.orderId = t2.orderId and t1.goodsid= t2.goodsid --此时才能保证订单明细中数据与退货表数据是一对一join
where t2.id is not null
group by t1.userId ;
ads层开发
– 用户订单分析指标统计
drop table if exists
itcast_ads
.ads_user_order_measure
;
create tableitcast_ads
.ads_user_order_measure
(
userid string, – 用户id
username string, – 用户名称
first_paytime string, – 第一次消费时间
lastest_paytime string, – 最近一次消费时间
first_day_during_days bigint, – 首单距今时间
lastest_day_durning_days bigint, – 尾单距今时间
min_paymoney double, – 最小消费金额
max_paymoney double, – 最大消费金额
total_count_without_back bigint, – 累计消费次数(不含退拒)
total_money_without_back double, – 累计消费金额(不含退拒)
total_count_without_back_30 bigint, – 近30天累计消费次数(不含退拒)
total_money_without_back_30 double, – 近30天累计消费金额(不含退拒)
total_count_30 bigint, – 近30天累计消费次数(含退拒)
total_money_30 double, – 近30天累计消费金额(含退拒)
atv double, – 客单价(含退拒)
atv_withoutback double, – 客单价(不含退拒)
atv_60 double, – 近60天客单价(含退拒)
atv_60_withoutback double, – 近60天客单价(不含退拒)
most_usual_address string, – 常用收货地址
most_usual_paytype string, – 常用支付方式
school_order_count bigint, – 学校下单总数
company_order_count bigint, – 单位下单总数
home_order_count bigint, – 家里下单总数
am_order_count bigint, – 上午下单总数
pm_order_count bigint, – 下午下单总数
night_order_count bigint, – 晚上下单总数
returnGoodsCount bigint, – 退货商品数量
returnGoodsMoney double, – 退货商品金额
rejectGoodsCount bigint, – 拒收商品数量
rejectGoodsMoney double, – 拒收商品金额
latestRefundTime string – 最后一次退拒时间
)
partitioned by (dt string)
STORED AS PARQUET;
insert overwrite table
itcast_ads
.ads_user_order_measure
partition(dt=‘20190908’)
select
t1.userid,
t1.username,
t1.first_paytime,
t1.lastest_paytime,
t1.first_day_during_days,
t1.lastest_day_durning_days,
t1.min_paymoney,
t1.max_paymoney,
t1.total_count_without_back,
t1.total_money_without_back,
t1.total_count_without_back_30,
t1.total_money_without_back_30,
t1.total_count_30,
t1.total_money_30,
t1.atv,
t1.atv_withoutback,
t1.atv_60,
t1.atv_60_withoutback,
t1.most_usual_address,
t1.most_usual_paytype,
t1.school_order_count,
t1.company_order_count,
t1.home_order_count,
t1.am_order_count,
t1.pm_order_count,
t1.night_order_count,
t2.returnGoodsCount,
t2.returnGoodsMoney,
t2.rejectGoodsCount,
t2.rejectGoodsMoney,
t2.latestRefundTime
fromitcast_ads
.tmp_user_order_measure
t1
left joinitcast_ads
.tmp_user_refund_measure
t2
on t1.userId = t2.userId;
–验证数据
select * from itcast_ads.ads_user_order_measure where dt=“20190908” limit 5;