SQL window function OVER keyword is simple to use (self summary)

SQL window function OVER keyword is simple to use (self summary)

Environmental preparation

# 可选,如果本机没装mysql 客户端
brew install mysql-client
# 然后导入到PATH 中
echo 'export PATH="/usr/local/opt/mysql-client/bin:$PATH"' >> ~/.bash_profile
source ~/.bash_profile

# 使用Docker 部署MySQL
# MySQL 8.0 开始支持窗口函数
docker run -d -e MYSQL_ROOT_PASSWORD=root \
           -p 3306:3306 --name mysql \
           mysql:latest --character-set-server=utf8mb4 \
                        --collation-server=utf8mb4_unicode_ci \
                        --lower_case_table_names=1

# MySQL 客户端连接数据库
mysql -h 127.0.0.1 -u root -proot
# 注意,-p 后面紧跟着密码,不能有空格

# mysql> 清屏小技巧
system clear

data preparation

-- 创建实验数据库
CREATE DATABASE IF NOT EXISTS TestDB;
USE TestDB;

-- 创建实验表
DROP TABLE IF EXISTS StudentScore;
CREATE TABLE StudentScore (
    ID INT NOT NULL AUTO_INCREMENT,
    StudentID INT NOT NULL DEFAULT 0,
    ClassID INT NOT NULL DEFAULT 0,
    CourseID INT NOT NULL DEFAULT 0,
    Score FLOAT NOT NULL DEFAULT 0,
    CreateDate TIMESTAMP NOT NULL DEFAULT NOW(),
    PRIMARY KEY (ID)
) AUTO_INCREMENT=1;

-- 插入实验数据
-- CourseId 2:语文 4:数学 8:英语
INSERT INTO StudentScore(StudentId,ClassId,CourseId,Score) VALUES 
-- 1班学生成绩
  (1,1,2,85), (2,1,2,95.5), (3,1,2,90),
  (1,1,4,90), (2,1,4,98),   (3,1,4,89),
  (1,1,8,80), (2,1,8,75.5), (3,1,8,77),
-- 2班学生成绩
  (1,2,2,90), (2,2,2,77), (3,2,2,78),
  (4,2,2,83), (1,2,4,98), (2,2,4,95),
  (3,2,4,78), (4,2,4,100),(1,2,8,85),
  (2,2,8,90), (3,2,8,86), (4,2,8,78.5),
-- 3班学生成绩
  (1,3,2,82), (2,3,2,78), (3,3,2,91),
  (1,3,4,83), (2,3,4,78), (3,3,4,99),
  (1,3,8,86), (2,3,8,78), (3,3,8,97);

Scene analysis

Scenario 1 Query the results of all subjects in each subject and display the average score of each subject

Do not use window functions

-- 我们一步步分开查询,最后结合所有步骤,用查询套子查询的方式实现需求。

-- 查询每个学生各科成绩
SELECT Id, StudentId, ClassId, CourseId, Score FROM StudentScore;

-- 查询全体英语平均分
SELECT CourseId, AVG(Score) FROM StudentScore WHERE CourseId = 8;  -- 当结果只有一行记录时不用写GROUP BY
SELECT CourseId, AVG(Score) FROM StudentScore GROUP BY CourseId HAVING CourseId = 8;  -- 结果同上

-- 查询全体各科目平均成绩
SELECT CourseId, AVG(Score) FROM StudentScore GROUP BY CourseId;

-- 查询每个班各科目平均成绩
SELECT 
  ClassId, CourseId, 
  CAST(AVG(Score) AS DECIMAL(5, 2)) AS '班级该科平均分'  -- 数值范围-999.99~999.99,小数2 位,超过舍入
FROM StudentScore 
GROUP BY ClassId, CourseId;

-- 结合以上语句查询出全体学生的单体成绩并显示各个科目平均分及班级的平均分
SELECT 
  ss.Id, ss.StudentId, ss.ClassId, 
  (CASE ss.CourseId  -- 根据科目编号显示中文名称
    WHEN 2 THEN '语文'
    WHEN 4 THEN '数学'
    WHEN 8 THEN '英语'
    ELSE 'Unknow'
  END) AS '科目', 
  ss.Score AS '分数',
  CONVERT(ccs.CC_Score, DECIMAL(5, 2)) AS '班级该科平均分',  -- 换个CONVERT,效果同上一条的CAST
  cs.C_Score AS '全体该科平均分'
FROM
  StudentScore ss,
  (SELECT CourseId, AVG(Score) AS C_Score FROM StudentScore GROUP BY CourseID) cs,
  (SELECT ClassId, CourseId, AVG(Score) AS CC_Score FROM StudentScore GROUP BY CourseID, ClassId) ccs
WHERE ss.CourseId = cs.CourseId AND ss.CourseId = ccs.CourseId AND ss.ClassId = ccs.ClassId;
-- 使用了子查询形成临时表cs 和ccs,再分别用表cs 和表ccs 与表ss 做隐式的内连接。

Use window functions

-- 使用窗口函数之前,我们先看一下SQL 在MySQL 中执行的顺序
/*
SQL 语句的执行顺序
(1)  FROM <left_table>
(2)  ON <join_condition>
(3)  <join_type> JOIN <right_table>
(4)  WHERE <where_condition>
(5)  GROUP BY <group_by_list>
(6)  HAVING <having_condition>
(7)  SELECT 
(8)  DISTINCT <select_list>
(9)  UNION
(10) ORDER BY <order_by_condition>
(11) LIMIT <limit_number>
随带提一下:
UNION 执行在ORDER BY 之前,所以联合两个已排序的表最终却乱序,
在MySQL 中要联结的有序表在最后加LIMIT 可以保证UNION 的结果顺序不变。
*/

-- 使用窗口函数查询全体学生的单体英语成绩并显示全体的英语平均分
SELECT 
  Id, StudentId, ClassId, Score,
  AVG(Score) OVER w  -- 明显OVER 不是一个函数,它是类似于IN 的关键字
FROM
  StudentScore
WHERE
  CourseID = 8
WINDOW w AS ();  -- 窗口的定义,() 表无条件,数据集和WHERE 处理完之后的一样,即数据不分组也不排序
/*
窗口函数包含:1、窗口定义;2、OVER 关键字前面的函数。
定义了几个窗口就相当于复制了几份在SELECT 执行之前的结果集(FROM、WHERE、GROUP BY 形成的)(假设叫V_SET),
再划分个区间留给OVER 关键字前面的函数进行运算,如果只有一个() 的话,那这个区间就跟被复制的V_SET 一样。
OVER 关键字前面的函数即窗口函数,它执行后的结果集和之前被复制的V_SET 关联在一起供SELECT 使用。
按照我的理解,估计窗口函数背后的原理就和前面不用窗口函数而是一堆子查询相关联一样。
理解OVER 这个关键字可参考这句英文:Put a clean cloth OVER the cakes while they cool. 
明显这里OVER 不是完蛋的意思,而是覆盖在某些东西之上,这里应该指的就是某个函数用于处理一定范围的数据。
*/
-- 以上查询等同于下面的写法,也是很常见的一种写法
SELECT 
  Id, StudentId, ClassId, Score,
  AVG(Score) OVER ()  -- 将WINDOW 的定义直接写在OVER 关键字后面,导致人们误以为OVER() 是个函数
FROM
  StudentScore
WHERE
  CourseID = 8;
/*
窗口定义基本语法:
1. 定义多个在SQL 语句结尾,用逗号隔开,PARTITION BY 类似于GROUP BY,数据分组
WINDOW 
  name1 AS ([PARTITION BY value_expression,..[n] ] <ORDER BY BY_Clause>),
  name2 AS ([PARTITION BY value_expression,..[n] ] <ORDER BY BY_Clause>)
然后在SELECT 中调用,一个窗口定义可以被多次调用
  函数1 OVER name1
  函数2 OVER name2
  函数3 OVER name1

2. 单个匿名窗口定义,直接跟在OVER 后面
OVER ([PARTITION BY value_expression,..[n] ] <ORDER BY BY_Clause>)
*/


-- 使用窗口函数查询各个科目全体学生的单体成绩并显示班级的平均分及全体的平均分
SELECT 
  Id, StudentId, ClassId,
  (CASE CourseId  -- 根据科目编号显示中文名称
    WHEN 2 THEN '语文'
    WHEN 4 THEN '数学'
    WHEN 8 THEN '英语'
    ELSE 'Unknow'
  END) AS '科目', 
  Score AS '分数',
  AVG(Score) OVER wcc AS '班级该科平均分',
  AVG(Score) OVER wc AS '全体该科平均分'
FROM
  StudentScore
WINDOW  -- 用逗号隔开可以定义多个窗口
  wc AS (PARTITION BY CourseId),  -- 按科目将数据分组
  wcc AS (PARTITION BY ClassId, CourseId)  -- 先按班级,再按科目将数据分组

Scene 2 Result set plus line number

Do not use window functions

-- 使用变量实现全年级英语成绩排名
-- set @RN = 0;  -- 在FROM 后面已经定义了,无须单独声明。
SELECT
    ss.Id,
    @RN := @RN + 1 AS '序号',
    ss.StudentId,
    ss.ClassId,
    ss.CourseId,
    ss.Score
FROM
    StudentScore ss, (select @RN := 0) t  -- 变量声明在这里
    WHERE ss.CourseId=8
    order by ss.Score desc

Use window functions

-- 复习:不用聚合函数也可以用GROUP BY,起到去重的作用
SELECT CourseId FROM StudentScore GROUP BY CourseId;  -- GROUP BY 在这里起到了DISTINCT 的去重作用

-- 证明一下,窗口函数是执行在FROM 、WHERE 或GROUP BY (当然也包含HAVING)之后的
SELECT 
  ROW_NUMBER() OVER w AS ROWNUM,  -- 把本行注释掉也是可以的,窗口定义了不被SELECT 使用而已
  CourseId 
FROM StudentScore 
GROUP BY CourseId 
WINDOW w AS (ORDER BY CourseId DESC);  -- 窗口里的数据集按照CourseId 字段降序排列,再和GROUP BY 产生的数据集关联

-- 使用窗口函数实现全年级英语成绩排名
SELECT
    Id,
    ROW_NUMBER() OVER w AS '序号',
    StudentId,
    ClassId,
    CourseId,
    Score
FROM
    StudentScore
    WHERE CourseId = 8
    WINDOW w AS (ORDER BY Score DESC);  -- 窗口里的数据集按照Score 字段降序排列,再和WHERE 产生的数据集关联

-- 使用匿名窗口定义即OVER () 方式,效果同上
SELECT
    Id,
    ROW_NUMBER() OVER (ORDER BY Score DESC) AS '序号',  -- 虽然写在这里,但它依然运行在FROM、WHERE 之后SELECT 之前,详见以上讲解
    StudentId,
    ClassId,
    CourseId,
    Score
FROM
    StudentScore
    WHERE CourseId = 8;

Later words

There is nothing to summarize. Looking at my example in full should understand the principle and use of window functions.
I saw the concept of "OVER function" on the Internet, and said that "the window function is executed once per line."
Under the dullness, I can't understand these expressions before doing this experiment.

Reference article

Summarize
the execution sequence and flow of simple SQL statements for SQL Server window functions.
MySQL Window Function Descriptions
Cambridge Dictionary OVER Explanation

Published 27 original articles · praised 4 · visits 9693

Guess you like

Origin blog.csdn.net/yoshubom/article/details/104162985