从SQL Server 2005起,SQL Server开始支持窗口函数 (Window Function),以及到SQL Server 2012,窗口函数功能增强,目前为止支持以下几种窗口函数:
1. 排序函数 (Ranking Function) ;
2. 聚合函数 (Aggregate Function) ;
3. 分析函数 (Analytic Function) ;
4. NEXT VALUE FOR Function, 这是给sequence专用的一个函数;
一. 排序函数(Ranking Function)
帮助文档里的代码示例很全。
排序函数中,ROW_NUMBER()较为常用,可用于去重、分页、分组中选择数据,生成数字辅助表等等;
排序函数在语法上要求OVER子句里必须含ORDER BY,否则语法不通过,对于不想排序的场景可以这样变通;
drop table if exists test_rankingcreate table test_ranking ( id int not null, name varchar(20) not null, value int not null) insert test_ranking select 1,'name1',1 union all select 1,'name2',2 union all select 2,'name3',2 union all select 3,'name4',2select id , name, ROW_NUMBER() over (PARTITION by id ORDER BY name) as numfrom test_rankingselect id , name, ROW_NUMBER() over (PARTITION by id) as numfrom test_ranking/*Msg 4112, Level 15, State 1, Line 1 The function 'ROW_NUMBER' must have an OVER clause with ORDER BY.*/--ORDERY BY后面给一个和原表无关的派生列select id , name, ROW_NUMBER() over (PARTITION by id ORDER BY GETDATE()) as numfrom test_rankingselect id , name, ROW_NUMBER() over (PARTITION by id ORDER BY (select 0)) as numfrom test_ranking
二. 聚合函数 (Aggregate Function)
SQL Server 2005中,窗口聚合函数仅支持PARTITION BY,也就是说仅能对分组的数据整体做聚合运算;
SQL Server 2012开始,窗口聚合函数支持ORDER BY,以及ROWS/RAGNE选项,原本需要子查询来实现的需求,如: 移动平均 (moving averages), 总计聚合 (cumulative aggregates), 累计求和 (running totals) 等,变得更加方便;
代码示例1:总计/小计/累计求和
drop table if exists test_aggregate;create table test_aggregate ( event_id varchar(100), rk int, price int)insert into test_aggregatevalues('a',1,10), ('a',2,10), ('a',3,50), ('b',1,10), ('b',2,20), ('b',3,30)--1. 没有窗口函数时,用子查询select a.event_id, a.rk, --build ranking column if needed a.price, (select sum(price) from test_aggregate b where b.event_id = a.event_id and b.rk <= a.rk) as totalprice from test_aggregate a--2. 从SQL Server 2012起,用窗口函数--2.1 --没有PARTITION BY, 没有ORDER BY,为全部总计;--只有PARTITION BY, 没有ORDER BY,为分组小计;--只有ORDER BY,没有PARTITION BY,为全部累计求和(RANGE选项,见2.2)select *, sum(price) over() as TotalPrice, sum(price) over(partition by event_id) as SubTotalPrice, sum(price) over(order by rk) as RunningTotalPrice from test_aggregate a--2.2 注意ORDER BY列的选择,可能会带来不同结果select *, sum(price) over(partition by event_id order by rk) as totalprice from test_aggregate a/*event_id rk price totalprice a 1 10 10 a 2 10 20 a 3 50 70 b 1 10 10 b 2 20 30 b 3 30 60*/select *, sum(price) over(partition by event_id order by price) as totalprice from test_aggregate a/*event_id rk price totalprice a 1 10 20 a 2 10 20 a 3 50 70 b 1 10 10 b 2 20 30 b 3 30 60*/--因为ORDER BY还有个子选项ROWS/RANGE,不指定的情况下默认为RANGE UNBOUNDED PRECEDING AND CURRENT ROW --RANGE按照ORDER BY中的列值,将相同的值的行均视为当前同一行select *,sum(price) over(partition by event_id order by price) as totalprice from test_aggregate aselect *,sum(price) over(partition by event_id order by price range between unbounded preceding and current row) as totalprice from test_aggregate a--如果ORDER BY中的列值有重复值,手动改用ROWS选项即可实现累计求和select *,sum(price) over(partition by event_id order by price rows between unbounded preceding and current row) as totalprice from test_aggregate a
代码示例2:移动平均
--移动平均,举个例子,就是求前N天的平均值,和股票市场的均线类似drop table if exists test_moving_avgcreate table test_moving_avg ( ID int, Value int, DT datetime)insert into test_moving_avg values(1,10,GETDATE()-10), (2,110,GETDATE()-9), (3,100,GETDATE()-8), (4,80,GETDATE()-7), (5,60,GETDATE()-6), (6,40,GETDATE()-5), (7,30,GETDATE()-4), (8,50,GETDATE()-3), (9,20,GETDATE()-2), (10,10,GETDATE()-1)--1. 没有窗口函数时,用子查询select *, (select AVG(Value) from test_moving_avg a where a.DT >= DATEADD(DAY, -5, b.DT) AND a.DT < b.DT) AS avg_value_5daysfrom test_moving_avg b--2. 从SQL Server 2012起,用窗口函数--三个内置常量,第一行,最后一行,当前行:UNBOUNDED PRECEDING, UNBOUNDED FOLLOWING, CURRENT ROW --在行间移动,用BETWEEN m preceding AND n following (m, n > 0)SELECT *, sum(value) over (ORDER BY DT ROWS BETWEEN 5 preceding AND CURRENT ROW) moving_sum, avg(value) over (ORDER BY DT ROWS BETWEEN 4 preceding AND CURRENT ROW) moving_avg1, avg(value) over (ORDER BY DT ROWS BETWEEN 5 preceding AND 1 preceding) moving_avg2, avg(value) over (ORDER BY DT ROWS BETWEEN 3 preceding AND 1 following) moving_avg3FROM test_moving_avgORDER BY DT
三. 分析函数 (Analytic Function)
代码示例1:取当前行某列的前一个/下一个值
drop table if exists test_analyticcreate table test_analytic ( SalesYear varchar(10), Revenue int, Offset int)insert into test_analyticvalues(2013,1001,1), (2014,1002,1), (2015,1003,1), (2016,1004,1), (2017,1005,1), (2018,1006,1)--当年及去年的销售额select *,lag(Revenue,1,null) over(order by SalesYear asc) as PreviousYearRevenue from test_analyticselect *,lag(Revenue,Offset,null) over(order by SalesYear asc) as PreviousYearRevenue from test_analyticselect *,lead(Revenue,1,null) over(order by SalesYear desc) as PreviousYearRevenue from test_analytic--当年及下一年的销售额select *,lead(Revenue,1,null) over(order by SalesYear asc) as NextYearRevenue from test_analyticselect *,lead(Revenue,Offset,null) over(order by SalesYear asc) as NextYearRevenue from test_analyticselect *,lag(Revenue,1,null) over(order by SalesYear desc) as NextYearRevenue from test_analytic--可以根据offset调整跨度
代码示例2:分组中某列最大/最小值,对应的其他列值
假设有个门禁系统,在员工每次进门时写入一条记录,记录了“身份号码”,“进门时间”,“衣服颜色",查询每个员工最后一次进门时的“衣服颜色”。
drop table if exists test_first_lastcreate table test_first_last ( EmployeeID int, EnterTime datetime, ColorOfClothes varchar(20) )insert into test_first_lastvalues(1001, GETDATE()-9, 'GREEN'), (1001, GETDATE()-8, 'RED'), (1001, GETDATE()-7, 'YELLOW'), (1001, GETDATE()-6, 'BLUE'), (1002, GETDATE()-5, 'BLACK'), (1002, GETDATE()-4, 'WHITE')--1. 用子查询--LastColorOfColthesselect * from test_first_last awhere not exists(select 1 from test_first_last b where a.EmployeeID = b.EmployeeID and a.EnterTime < b.EnterTime)--LastColorOfColthesselect *from (select *, ROW_NUMBER() over(partition by EmployeeID order by EnterTime DESC) numfrom test_first_last ) twhere t.num =1--2. 用窗口函数--用LAST_VALUE时,必须加上ROWS/RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING,否则结果不正确select *, FIRST_VALUE(ColorOfClothes) OVER (PARTITION BY EmployeeID ORDER BY EnterTime DESC) as LastColorOfClothes, FIRST_VALUE(ColorOfClothes) OVER (PARTITION BY EmployeeID ORDER BY EnterTime ASC) as FirstColorOfClothes, LAST_VALUE(ColorOfClothes) OVER (PARTITION BY EmployeeID ORDER BY EnterTime ASC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as LastColorOfClothes, LAST_VALUE(ColorOfClothes) OVER (PARTITION BY EmployeeID ORDER BY EnterTime DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) as FirstColorOfClothesfrom test_first_last--对于显示表中所有行,并追加Last/First字段时用窗口函数方便些--对于挑选表中某一行/多行时,用子查询更方便
四. NEXT VALUE FOR Function
drop sequence if exists test_seqcreate sequence test_seq start with 1increment by 1;GOdrop table if exists test_next_valuecreate table test_next_value ( ID int, Name varchar(10) )insert into test_next_value(Name)values('AAA'), ('AAA'), ('BBB'), ('CCC')--对于多行数据获取sequence的next value,是否使用窗口函数都会逐行计数--窗口函数中ORDER BY用于控制不同列值的计数顺序select *, NEXT VALUE FOR test_seq from test_next_valueselect *, NEXT VALUE FOR test_seq OVER(ORDER BY Name DESC) from test_next_value
参考:
SELECT - OVER Clause (Transact-SQL)
SQL Server Windowing Functions: ROWS vs. RANGE
https://www.sqlpassion.at/archive/2015/01/22/sql-server-windowing-functions-rows-vs-range/