SQL Complex Transformation

问题描述:

我有下面这个,我想要的是,任何由于地址变化而移动GP的人在该期间应该有开始日期和结束日期。但结束日期将比下一个开始日期少。请如何编写此查询?SQL Complex Transformation

DECLARE @Tab TABLE(Local_Patient_Identifier VARCHAR(70),  
    NHS_Number VARCHAR(70), GMP VARCHAR(70), Practice_Code_GP VARCHAR(70), CDS_Date DATE) 
INSERT INTO @Tab VALUES 
('A111111111', '8BFD000', 'G111111', 'N77777', '2016-05-23'), 
('A111111111', '8BFD000', 'G222222', 'N77777', '2016-06-13'), 
('A111111111', '8BFD000', 'G222222', 'N77777', '2016-06-13'), 
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-02-09'), 
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-03-06'), 
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-03-15'), 
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-03-29'), 
('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-05-10'), 
('A111111112', '8BFD002', 'G3333332', 'JJ44444', '2015-05-21'), 
('A111111112', '8BFD002', 'G3333332', 'KK44445', '2016-05-02'), 
('A111111112', '8BFD002', 'G3333332', 'WW44444', '2017-02-13') 
SELECT*FROM @Tab 

预期输出 enter image description here

+0

发生第4行至第8排什么窗函数导?为什么这些行不是输出的一部分? –

+0

@KannanKandasamy每当一个人改变GP,他们都会改变。当前的GP结束日期将为空。GP是GMP列 – JonWay

+0

因此,如果'GMP'改变或'Practice_code_GP改变',那应该引发一个新的时期? – Xedni

您可以使用ROW_NUMBER得到这个结果:

;With Cte as (
    Select *,JoinKey = Row_Number() over(partition by Local_Patient_Identifier order by CDS_Date) from (
     Select *, RowN = Row_Number() over(partition by Local_patient_Identifier, GMP, Practice_Code_GP order by CDS_Date) 
      from #tab 
     ) a 
    where a.RowN = 1 
) 
Select c1.Local_Patient_Identifier,c1.NHS_Number, c1.GMP, c1.Practice_Code_GP, c1.CDS_Date as StartDate, 
    Dateadd(day, -1 , c2.CDS_Date) as EndDate from cte c1 left join cte c2 
on c1.Local_Patient_Identifier = c2.Local_Patient_Identifier 
and c1.JoinKey = c2.JoinKey - 1 

输出如下:

+--------------------------+------------+----------+------------------+------------+------------+ 
| Local_Patient_Identifier | NHS_Number | GMP | Practice_Code_GP | StartDate | EndDate | 
+--------------------------+------------+----------+------------------+------------+------------+ 
| A111111111    | 8BFD000 | G111111 | N77777   | 2016-05-23 | 2016-06-12 | 
| A111111111    | 8BFD000 | G222222 | N77777   | 2016-06-13 | 2017-02-08 | 
| A111111111    | 8BFD000 | G3333333 | ZZ44444   | 2017-02-09 | NULL  | 
| A111111112    | 8BFD002 | G3333332 | JJ44444   | 2015-05-21 | 2016-05-01 | 
| A111111112    | 8BFD002 | G3333332 | KK44445   | 2016-05-02 | 2017-02-12 | 
| A111111112    | 8BFD002 | G3333332 | WW44444   | 2017-02-13 | NULL  | 
+--------------------------+------------+----------+------------------+------------+------------+ 

您可以使用,如果你正在使用SQL Server> = 2012

+0

作为一般规则,您希望避免多次引用CTE。原因是CTE中的代码每次被引用时都会执行。最好将cte结果“缓存”到#TempTable或@TableVariable,然后多次引用...... –

+0

我不同于这个想法......它又取决于作为CTE选择的数据量......原因是CTE在内部存储没有统计信息的内存......如果它超出内存限制,它将默认溢出到tempdb,而不进行统计。如果与分配的最大内存大小相比,体积是最小的,我仍然更喜欢去CTE ...如果大容量的记录,那么我们可以去临时表 –

+0

你可以不同所有你想要的,它并不会减少真正的。如果CTE是查询中最昂贵的部分(在这种情况下,这是因为排序),那么每次引用CTE时都会产生成本。 –

以下是我wenta回合它。因为我厌倦了输入长名字,所以在重新编写专栏时我已将其重命名,但是我已经在离您的期望更近的路上给它们加了别名。我也转换使用英国的日期格式(DD/MM/YYYY)来匹配您的输出

declare @tab table 
(
    LPI varchar(70),  
    NHSNum varchar(70), 
    GMP varchar(70), 
    GP varchar(70), 
    CDSDate date 
) 
insert into @Tab 
values 
    ('A111111111', '8BFD000', 'G111111', 'N77777', '2016-05-23'), 
    ('A111111111', '8BFD000', 'G222222', 'N77777', '2016-06-13'), 
    ('A111111111', '8BFD000', 'G222222', 'N77777', '2016-06-13'), 
    ('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-02-09'), 
    ('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-03-06'), 
    ('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-03-15'), 
    ('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-03-29'), 
    ('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-05-10'), 
    ('A111111112', '8BFD002', 'G3333332', 'JJ44444', '2015-05-21'), 
    ('A111111112', '8BFD002', 'G3333332', 'KK44445', '2016-05-02'), 
    ('A111111112', '8BFD002', 'G3333332', 'WW44444', '2017-02-13') 


;with src as 
(
    select 
     RID = row_number() over (partition by LPI, NHSNum order by min(CDSDate)),  
     LPI, 
     NHSNum, 
     GMP, 
     GP, 
     MinDate = min(CDSDate) 
    from @tab 
    group by  
     LPI, 
     NHSNum, 
     GMP, 
     GP 
) 
select 
    LocalPatientIdentifier = a.LPI, 
    NHSNumber = a.NHSNum, 
    GMP = a.GMP, 
    PracticeCodeGP = a.GP, 
    StartDate = convert(varchar(50), a.MinDate, 103), 
    EndDate = convert(varchar(50), dateadd(day, -1, b.MinDate), 103) 
from src a 
left outer join src b 
    on a.LPI = b.LPI 
     and a.NHSNum = b.NHSNum 
     and a.RID = b.RID - 1 

输出日期下面应该是不错的SQL Server 2008R2 ...

IF OBJECT_ID('tempdb..#Tab', 'U') IS NOT NULL 
DROP TABLE #Tab; 

CREATE TABLE #Tab (
    Local_Patient_Identifier VARCHAR(70),  
    NHS_Number VARCHAR(70), 
    GMP VARCHAR(70), 
    Practice_Code_GP VARCHAR(70), 
    CDS_Date DATE 
    ); 
INSERT #Tab (Local_Patient_Identifier, NHS_Number, GMP, Practice_Code_GP, CDS_Date) VALUES 
    ('A111111111', '8BFD000', 'G111111', 'N77777', '2016-05-23'), 
    ('A111111111', '8BFD000', 'G222222', 'N77777', '2016-06-13'), 
    ('A111111111', '8BFD000', 'G222222', 'N77777', '2016-06-13'), 
    ('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-02-09'), 
    ('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-03-06'), 
    ('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-03-15'), 
    ('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-03-29'), 
    ('A111111111', '8BFD000', 'G3333333', 'ZZ44444', '2017-05-10'), 
    ('A111111112', '8BFD002', 'G3333332', 'JJ44444', '2015-05-21'), 
    ('A111111112', '8BFD002', 'G3333332', 'KK44445', '2016-05-02'), 
    ('A111111112', '8BFD002', 'G3333332', 'WW44444', '2017-02-13'); 

-- SELECT * FROM #Tab t 

--====================================================================== 

IF OBJECT_ID('tempdb..#ChangeData', 'U') IS NOT NULL 
DROP TABLE #ChangeData; 

WITH 
    cte_AddRN AS (
     SELECT 
      t.Local_Patient_Identifier, 
      t.NHS_Number, 
      t.GMP, 
      t.Practice_Code_GP, 
      t.CDS_Date, 
      RN = ROW_NUMBER() OVER (PARTITION BY t.Local_Patient_Identifier, t.GMP, t.Practice_Code_GP ORDER BY t.CDS_Date) 
     FROM 
      #Tab t 
     ) 
SELECT 
    ar.Local_Patient_Identifier, 
    ar.NHS_Number, 
    ar.GMP, 
    ar.Practice_Code_GP, 
    ar.CDS_Date, 
    RN = ROW_NUMBER() OVER (PARTITION BY ar.Local_Patient_Identifier ORDER BY ar.CDS_Date) 
    INTO #ChangeData 
FROM 
    cte_AddRN ar 
WHERE 
    ar.RN = 1; 

-- SELECT * FROM #ChangeData cd 

SELECT 
    cd1.Local_Patient_Identifier, 
    cd1.NHS_Number, 
    cd1.GMP, 
    cd1.Practice_Code_GP, 
    StartDate = cd1.CDS_Date, 
    EndDate = cd2.CDS_Date 
FROM 
    #ChangeData cd1 
    LEFT JOIN #ChangeData cd2 
     ON cd1.Local_Patient_Identifier = cd2.Local_Patient_Identifier 
     AND cd1.RN = cd2.RN - 1; 

结果.. 。

Local_Patient_Identifier NHS_Number GMP   Practice_Code_GP StartDate EndDate 
------------------------ ---------- -------- ---------------- ---------- ---------- 
A111111111     8BFD000  G111111  N77777    2016-05-23 2016-06-13 
A111111111     8BFD000  G222222  N77777    2016-06-13 2017-02-09 
A111111111     8BFD000  G3333333 ZZ44444    2017-02-09 NULL 
A111111112     8BFD002  G3333332 JJ44444    2015-05-21 2016-05-02 
A111111112     8BFD002  G3333332 KK44445    2016-05-02 2017-02-13 
A111111112     8BFD002  G3333332 WW44444    2017-02-13 NULL