千亿数仓第四章(指标计算_用户的退货指标分析)

退货表指标统计
千亿数仓第四章(指标计算_用户的退货指标分析)
参考代码:
–dw层创建fact_order_refunds表

drop table if exists itcast_dw.fact_order_refunds;
create table itcast_dw.fact_order_refunds(
id bigint,
orderId bigint,
goodsId bigint,
refundTo bigint,
refundReson bigint,
refundOtherReson string,
backMoney double,
refundTradeNo string,
refundRemark string,
refundTime string,
shopRejectReason string,
refundStatus bigint,
createTime string
)
partitioned by (dt string)
STORED AS PARQUET;

–从ods抽取数据到dw层中的fact_order_refunds

INSERT OVERWRITE TABLE itcast_dw.fact_order_refunds PARTITION (dt=‘20190908’)
SELECT
id
,orderId
,goodsId
,refundTo
,refundReson
,refundOtherReson
,backMoney
,refundTradeNo
,refundRemark
,refundTime
,shopRejectReason
,refundStatus
,createTime
FROM itcast_ods.itcast_order_refunds WHERE dt=‘20190908’ ;

– 退货表指标统计

drop table if exists itcast_ads.tmp_user_refund_measure;
create table itcast_ads.tmp_user_refund_measure(
userId string, – 用户id
returnGoodsCount bigint, – 退货商品数量
returnGoodsMoney double, – 退货商品金额
rejectGoodsCount bigint, – 拒收商品数量
rejectGoodsMoney double, – 拒收商品金额
latestRefundTime string – 最后一次退拒时间
)
partitioned by (dt string)
STORED AS PARQUET;

– 订单明细拉宽

drop table if exists itcast_dw.tmp_order_goods_wide;
create table itcast_dw.tmp_order_goods_wide(
orderId bigint,
orderStatus bigint,
payType bigint,
userId bigint,
userAddressId bigint,
payTime string,
payPrice double,
goodsId bigint,
goodsNum bigint,
createtime string
)
partitioned by(dt string)
STORED AS PARQUET;

– 订单明细拉宽表导入数据

insert overwrite table itcast_dw.tmp_order_goods_wide partition (dt=‘20190908’)
select
t1.orderId,
t1.orderStatus,
t1.payType,
t1.userId,
t1.userAddressId,
t1.payTime,
t2.payPrice,
t2.goodsId,
t2.goodsNum,
t2.createtime
from
(select * from itcast_ods.itcast_orders where dt=‘20190908’) t1
join
(select * from itcast_ods.itcast_order_goods where dt=‘20190908’) t2
on
t1.orderId = t2.orderId;

– 测试

select * from itcast_dw.tmp_order_goods_wide limit 10;

insert overwrite table itcast_ads.tmp_user_refund_measure partition(dt=‘20190908’)
select
t1.userId,
sum(case when t1.orderStatus = 11 then t1.goodsnum else 0 end) as returnGoodsCount, – 退货商品数量
sum(case when t1.orderStatus = 11 then t2.backMoney else 0 end) as returnGoodsMoney, – 退货金额
sum(case when t1.orderStatus = -3 then t1.goodsnum else 0 end) as rejectGoodsCount, – 拒收商品数量
sum(case when t1.orderStatus = -3 then t2.backMoney else 0 end) as rejectGoodsMoney, – 拒收商品金额
MAX(t2.createtime) as latestRefundTime – 最近一次退拒时间
from
(select * from itcast_dw.tmp_order_goods_wide where orderStatus =-3 or orderStatus = 11) t1 —订单状态是退和拒的订单(以订单明细为基础的表)
left join
(select * from itcast_dw.fact_order_refunds where dt=‘20190908’) t2
on t1.orderId = t2.orderId and t1.goodsid= t2.goodsid --此时才能保证订单明细中数据与退货表数据是一对一join
where t2.id is not null
group by t1.userId ;

ads层开发
– 用户订单分析指标统计

drop table if exists itcast_ads.ads_user_order_measure;
create table itcast_ads.ads_user_order_measure(
userid string, – 用户id
username string, – 用户名称
first_paytime string, – 第一次消费时间
lastest_paytime string, – 最近一次消费时间
first_day_during_days bigint, – 首单距今时间
lastest_day_durning_days bigint, – 尾单距今时间
min_paymoney double, – 最小消费金额
max_paymoney double, – 最大消费金额
total_count_without_back bigint, – 累计消费次数(不含退拒)
total_money_without_back double, – 累计消费金额(不含退拒)
total_count_without_back_30 bigint, – 近30天累计消费次数(不含退拒)
total_money_without_back_30 double, – 近30天累计消费金额(不含退拒)
total_count_30 bigint, – 近30天累计消费次数(含退拒)
total_money_30 double, – 近30天累计消费金额(含退拒)
atv double, – 客单价(含退拒)
atv_withoutback double, – 客单价(不含退拒)
atv_60 double, – 近60天客单价(含退拒)
atv_60_withoutback double, – 近60天客单价(不含退拒)
most_usual_address string, – 常用收货地址
most_usual_paytype string, – 常用支付方式
school_order_count bigint, – 学校下单总数
company_order_count bigint, – 单位下单总数
home_order_count bigint, – 家里下单总数
am_order_count bigint, – 上午下单总数
pm_order_count bigint, – 下午下单总数
night_order_count bigint, – 晚上下单总数
returnGoodsCount bigint, – 退货商品数量
returnGoodsMoney double, – 退货商品金额
rejectGoodsCount bigint, – 拒收商品数量
rejectGoodsMoney double, – 拒收商品金额
latestRefundTime string – 最后一次退拒时间
)
partitioned by (dt string)
STORED AS PARQUET;

insert overwrite table itcast_ads.ads_user_order_measure partition(dt=‘20190908’)
select
t1.userid,
t1.username,
t1.first_paytime,
t1.lastest_paytime,
t1.first_day_during_days,
t1.lastest_day_durning_days,
t1.min_paymoney,
t1.max_paymoney,
t1.total_count_without_back,
t1.total_money_without_back,
t1.total_count_without_back_30,
t1.total_money_without_back_30,
t1.total_count_30,
t1.total_money_30,
t1.atv,
t1.atv_withoutback,
t1.atv_60,
t1.atv_60_withoutback,
t1.most_usual_address,
t1.most_usual_paytype,
t1.school_order_count,
t1.company_order_count,
t1.home_order_count,
t1.am_order_count,
t1.pm_order_count,
t1.night_order_count,
t2.returnGoodsCount,
t2.returnGoodsMoney,
t2.rejectGoodsCount,
t2.rejectGoodsMoney,
t2.latestRefundTime
from
itcast_ads.tmp_user_order_measure t1
left join itcast_ads.tmp_user_refund_measure t2
on t1.userId = t2.userId;

–验证数据

select * from itcast_ads.ads_user_order_measure where dt=“20190908” limit 5;