HAWQ。按进/出时间加入/退出行
我相信这是你想要达到的目的。诀窍是使用窗口函数“row_number()”。
select sub1.car_id, sub1.id_in, sub1.cross_date_time_in, sub2.id_out, sub2.cross_date_time_out
from (
select car_id, id as id_in,
cross_date_time as cross_date_time_in,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'in') as sub1
join (select car_id, id as id_out,
cross_date_time as cross_date_time_out,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'out') as sub2 on sub1.car_id = sub2.car_id and sub1.row_num = sub2.row_num;
如果您喜欢这种格式,也可以使用常见的表达式来编写此表达式。
with sub1 as (select car_id, id as id_in, cross_date_time as cross_date_time_in,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'in'),
sub2 as (select car_id, id as id_out, cross_date_time as cross_date_time_out,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'out')
select sub1.car_id, sub1.id_in, sub1.cross_date_time_in,
sub2.id_out, sub2.cross_date_time_out
from sub1
join sub2 on sub1.car_id = sub2.car_id and sub1.row_num = sub2.row_num;
感谢的 不幸的是,它没有给出正确的结果。 错误的结果。 1 1 2017-02-12 10:20:15.000000 1 2017-02-09 10:20:15.000000 – Kobra
create table source_table
(
id INT
,car_id INT
,direction text
,cross_date_time TIMESTAMP
);
insert into source_table
values (1, 1,'in', to_timestamp('2017-02-02-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 1,'in', to_timestamp('2017-02-12-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 1,'in', to_timestamp('2017-02-18-10:20:15', 'yyyy-MM-dd hh:mi:ss'));;
insert into source_table
values (1, 1,'in', to_timestamp('2017-02-25-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 1,'out', to_timestamp('2017-02-08-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 1,'out', to_timestamp('2017-02-09-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 1,'out', to_timestamp('2017-02-27-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'in', to_timestamp('2017-02-02-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'in', to_timestamp('2017-02-12-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'in', to_timestamp('2017-02-18-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'out', to_timestamp('2017-02-08-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'out', to_timestamp('2017-02-14-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'out', to_timestamp('2017-02-27-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'out', to_timestamp('2017-02-29-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 3,'in', to_timestamp('2017-02-02-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 3,'in', to_timestamp('2017-02-12-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 3,'out', to_timestamp('2017-02-08-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 3,'out', to_timestamp('2017-02-14-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
select sub1.car_id, sub1.id_in, sub1.cross_date_time_in, sub2.id_out, sub2.cross_date_time_out
from (
select car_id, id as id_in,
cross_date_time as cross_date_time_in,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'in') as sub1
join (select car_id, id as id_out,
cross_date_time as cross_date_time_out,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'out') as sub2 on sub1.car_id = sub2.car_id and sub1.row_num = sub2.row_num;
的错误结果。
1 1 2017年2月12日10:20:15.000000 1 2017年2月9日10:20:15.000000
如果使用左连接变体:
select sub1.car_id, sub1.id_in, sub1.cross_date_time_in, sub2.id_out, sub2.cross_date_time_out
from (
select car_id, id as id_in,
cross_date_time as cross_date_time_in,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'in') as sub1
left join (select car_id, id as id_out,
cross_date_time as cross_date_time_out,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'out') as sub2 on sub1.car_id = sub2.car_id and sub1.row_num = sub2.row_num;
错误结果: 1 1 2017-02-12 10:20:15.000000 1 2017-02-09 10:20:15.000000 1 1 2017-02-18 10:20:15.000000 1 2017-02-27 10:20 :15.000000 1 1 20 17-02-25 10:20:15.000000
你改变了这个问题,通过使用一个常量值为id,但更重要的是你没有描述业务逻辑。我正在猜测。它看起来像你只是订购进出行和基于匹配的匹配car_id以及它们根据日期显示的顺序。 –
不应该第二行id_in是3,而不是4?您的样本数据有4个“in”记录,用于car_id = 1,但有3个记录。你是如何确定它应该是id = 4而不是id = 3的? –
目前我正在清理数据。 'in'在2月18日和25日,'出'只有27。 所以我完全删除第18。 – Kobra