SQL Complex Transformation
我有下面这个,我想要的是,任何由于地址变化而移动GP的人在该期间应该有开始日期和结束日期。但结束日期将比下一个开始日期少。请如何编写此查询?SQL Complex Transformation
DECLARE @Tab TABLE(Local_Patient_Identifier VARCHAR(70),
NHS_Number VARCHAR(70), GMP VARCHAR(70), Practice_Code_GP VARCHAR(70), CDS_Date DATE)
INSERT INTO @Tab VALUES
('A111111111', '8BFD000', 'G111111', 'N77777', '2016-05-23'),
('A111111111', '8BFD000', 'G222222', 'N77777', '2016-06-13'),
('A111111111', '8BFD000', 'G222222', 'N77777', '2016-06-13'),
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-02-09'),
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-03-06'),
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-03-15'),
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-03-29'),
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-05-10'),
('A111111112', '8BFD002', 'G3333332', 'JJ44444', '2015-05-21'),
('A111111112', '8BFD002', 'G3333332', 'KK44445', '2016-05-02'),
('A111111112', '8BFD002', 'G3333332', 'WW44444', '2017-02-13')
SELECT*FROM @Tab
您可以使用ROW_NUMBER得到这个结果:
;With Cte as (
Select *,JoinKey = Row_Number() over(partition by Local_Patient_Identifier order by CDS_Date) from (
Select *, RowN = Row_Number() over(partition by Local_patient_Identifier, GMP, Practice_Code_GP order by CDS_Date)
from #tab
) a
where a.RowN = 1
)
Select c1.Local_Patient_Identifier,c1.NHS_Number, c1.GMP, c1.Practice_Code_GP, c1.CDS_Date as StartDate,
Dateadd(day, -1 , c2.CDS_Date) as EndDate from cte c1 left join cte c2
on c1.Local_Patient_Identifier = c2.Local_Patient_Identifier
and c1.JoinKey = c2.JoinKey - 1
输出如下:
+--------------------------+------------+----------+------------------+------------+------------+
| Local_Patient_Identifier | NHS_Number | GMP | Practice_Code_GP | StartDate | EndDate |
+--------------------------+------------+----------+------------------+------------+------------+
| A111111111 | 8BFD000 | G111111 | N77777 | 2016-05-23 | 2016-06-12 |
| A111111111 | 8BFD000 | G222222 | N77777 | 2016-06-13 | 2017-02-08 |
| A111111111 | 8BFD000 | G3333333 | ZZ44444 | 2017-02-09 | NULL |
| A111111112 | 8BFD002 | G3333332 | JJ44444 | 2015-05-21 | 2016-05-01 |
| A111111112 | 8BFD002 | G3333332 | KK44445 | 2016-05-02 | 2017-02-12 |
| A111111112 | 8BFD002 | G3333332 | WW44444 | 2017-02-13 | NULL |
+--------------------------+------------+----------+------------------+------------+------------+
您可以使用,如果你正在使用SQL Server> = 2012
作为一般规则,您希望避免多次引用CTE。原因是CTE中的代码每次被引用时都会执行。最好将cte结果“缓存”到#TempTable或@TableVariable,然后多次引用...... –
我不同于这个想法......它又取决于作为CTE选择的数据量......原因是CTE在内部存储没有统计信息的内存......如果它超出内存限制,它将默认溢出到tempdb,而不进行统计。如果与分配的最大内存大小相比,体积是最小的,我仍然更喜欢去CTE ...如果大容量的记录,那么我们可以去临时表 –
你可以不同所有你想要的,它并不会减少真正的。如果CTE是查询中最昂贵的部分(在这种情况下,这是因为排序),那么每次引用CTE时都会产生成本。 –
以下是我wenta回合它。因为我厌倦了输入长名字,所以在重新编写专栏时我已将其重命名,但是我已经在离您的期望更近的路上给它们加了别名。我也转换使用英国的日期格式(DD/MM/YYYY)来匹配您的输出
declare @tab table
(
LPI varchar(70),
NHSNum varchar(70),
GMP varchar(70),
GP varchar(70),
CDSDate date
)
insert into @Tab
values
('A111111111', '8BFD000', 'G111111', 'N77777', '2016-05-23'),
('A111111111', '8BFD000', 'G222222', 'N77777', '2016-06-13'),
('A111111111', '8BFD000', 'G222222', 'N77777', '2016-06-13'),
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-02-09'),
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-03-06'),
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-03-15'),
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-03-29'),
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-05-10'),
('A111111112', '8BFD002', 'G3333332', 'JJ44444', '2015-05-21'),
('A111111112', '8BFD002', 'G3333332', 'KK44445', '2016-05-02'),
('A111111112', '8BFD002', 'G3333332', 'WW44444', '2017-02-13')
;with src as
(
select
RID = row_number() over (partition by LPI, NHSNum order by min(CDSDate)),
LPI,
NHSNum,
GMP,
GP,
MinDate = min(CDSDate)
from @tab
group by
LPI,
NHSNum,
GMP,
GP
)
select
LocalPatientIdentifier = a.LPI,
NHSNumber = a.NHSNum,
GMP = a.GMP,
PracticeCodeGP = a.GP,
StartDate = convert(varchar(50), a.MinDate, 103),
EndDate = convert(varchar(50), dateadd(day, -1, b.MinDate), 103)
from src a
left outer join src b
on a.LPI = b.LPI
and a.NHSNum = b.NHSNum
and a.RID = b.RID - 1
输出日期下面应该是不错的SQL Server 2008R2 ...
IF OBJECT_ID('tempdb..#Tab', 'U') IS NOT NULL
DROP TABLE #Tab;
CREATE TABLE #Tab (
Local_Patient_Identifier VARCHAR(70),
NHS_Number VARCHAR(70),
GMP VARCHAR(70),
Practice_Code_GP VARCHAR(70),
CDS_Date DATE
);
INSERT #Tab (Local_Patient_Identifier, NHS_Number, GMP, Practice_Code_GP, CDS_Date) VALUES
('A111111111', '8BFD000', 'G111111', 'N77777', '2016-05-23'),
('A111111111', '8BFD000', 'G222222', 'N77777', '2016-06-13'),
('A111111111', '8BFD000', 'G222222', 'N77777', '2016-06-13'),
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-02-09'),
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-03-06'),
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-03-15'),
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-03-29'),
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-05-10'),
('A111111112', '8BFD002', 'G3333332', 'JJ44444', '2015-05-21'),
('A111111112', '8BFD002', 'G3333332', 'KK44445', '2016-05-02'),
('A111111112', '8BFD002', 'G3333332', 'WW44444', '2017-02-13');
-- SELECT * FROM #Tab t
--======================================================================
IF OBJECT_ID('tempdb..#ChangeData', 'U') IS NOT NULL
DROP TABLE #ChangeData;
WITH
cte_AddRN AS (
SELECT
t.Local_Patient_Identifier,
t.NHS_Number,
t.GMP,
t.Practice_Code_GP,
t.CDS_Date,
RN = ROW_NUMBER() OVER (PARTITION BY t.Local_Patient_Identifier, t.GMP, t.Practice_Code_GP ORDER BY t.CDS_Date)
FROM
#Tab t
)
SELECT
ar.Local_Patient_Identifier,
ar.NHS_Number,
ar.GMP,
ar.Practice_Code_GP,
ar.CDS_Date,
RN = ROW_NUMBER() OVER (PARTITION BY ar.Local_Patient_Identifier ORDER BY ar.CDS_Date)
INTO #ChangeData
FROM
cte_AddRN ar
WHERE
ar.RN = 1;
-- SELECT * FROM #ChangeData cd
SELECT
cd1.Local_Patient_Identifier,
cd1.NHS_Number,
cd1.GMP,
cd1.Practice_Code_GP,
StartDate = cd1.CDS_Date,
EndDate = cd2.CDS_Date
FROM
#ChangeData cd1
LEFT JOIN #ChangeData cd2
ON cd1.Local_Patient_Identifier = cd2.Local_Patient_Identifier
AND cd1.RN = cd2.RN - 1;
结果.. 。
Local_Patient_Identifier NHS_Number GMP Practice_Code_GP StartDate EndDate
------------------------ ---------- -------- ---------------- ---------- ----------
A111111111 8BFD000 G111111 N77777 2016-05-23 2016-06-13
A111111111 8BFD000 G222222 N77777 2016-06-13 2017-02-09
A111111111 8BFD000 G3333333 ZZ44444 2017-02-09 NULL
A111111112 8BFD002 G3333332 JJ44444 2015-05-21 2016-05-02
A111111112 8BFD002 G3333332 KK44445 2016-05-02 2017-02-13
A111111112 8BFD002 G3333332 WW44444 2017-02-13 NULL
发生第4行至第8排什么窗函数导?为什么这些行不是输出的一部分? –
@KannanKandasamy每当一个人改变GP,他们都会改变。当前的GP结束日期将为空。GP是GMP列 – JonWay
因此,如果'GMP'改变或'Practice_code_GP改变',那应该引发一个新的时期? – Xedni