查找缺失值
drop table if exists seqtbl;
create table seqtbl(
seq int8,
name1 varchar(10)
);
insert into seqtbl values(1, '迪克');
insert into seqtbl values(2, '安');
insert into seqtbl values(3, '莱露');
insert into seqtbl values(5, '卡');
insert into seqtbl values(6, '玛丽');
insert into seqtbl values(8, '本');
-- 将表整体看做是一个集合,就可以像一下解决问题——寻找缺失的编号
-- ## 1 确定是否存在缺失
select '存在缺失编号' as gap -- 当having 为True 时会返回该值
from seqtbl
having count(*) <> max(seq);
-- ## 2 查询缺失编号的最小值
select min(a.seq + 1) as gap
from seqtbl a
where not exists (select * from seqtbl b where a.seq + 1 = b.seq); # 可以防止NULL的干扰
求众数
-- 用having句子进行子查询:求众数
drop table if exists graduates;
create table graduates(
name1 varchar(20),
income integer
);
insert into graduates values('三普森', 400000);
insert into graduates values('迈克', 30000);
insert into graduates values('怀特', 20000);
insert into graduates values('阿诺德', 20000);
insert into graduates values('史密斯', 20000);
insert into graduates values('劳伦斯', 15000);
insert into graduates values('哈德逊', 15000);
insert into graduates values('肯特', 10000);
insert into graduates values('贝克', 10000);
insert into graduates values('斯科特', 10000);
-- 求众数(1):使用谓语
select income, count(1) as cnt
from graduates
group by income
having count(1) >= all(select count(1) from graduates group by income);
-- all 会受 NULL 影响,可修正为以下
select income, count(1) as cnt
from graduates
group by income
having count(1) >= (select max(cmt) from (select count(1) cnt from graduates group by income));
求中位数
-- having子句进行自连接:求中位数
-- 使用非等值自连接
select avg(distinct income)
from (select t1.income
from graduates t1, graduates t2
group by t1.income -- 对 income 分组 分组条件having中的True
-- 求低工资部分子集 等号是为了让两个子集拥有交集
having sum(case when t2.income >= t1.income then 1 else 0 end) >= count(1) /2
-- 求高工资部分子集
and sum(case when t2.income <= t1.income then 1 else 0 end) >= count(1) / 2) tmp;
关系除法运算进行购物篮分析
-- 用关系除法运算进行购物篮分析
drop table if exists items;
create table items(
item varchar(20)
);
insert into items values('啤酒');
insert into items values('纸尿裤');
insert into items values('自行车');
drop table if exists shopitems;
create table shopitems(
shop varchar(20),
item varchar(20)
);
insert into shopitems values('仙台', '啤酒');
insert into shopitems values('仙台', '纸尿裤');
insert into shopitems values('仙台', '自行车');
insert into shopitems values('仙台', '窗帘');
insert into shopitems values('东京', '啤酒');
insert into shopitems values('东京', '纸尿裤');
insert into shopitems values('东京', '自行车');
insert into shopitems values('大阪', '电视');
insert into shopitems values('大阪', '纸尿裤');
insert into shopitems values('大阪', '自行车');
-- 查询啤酒、纸尿裤和自行车同时在库的店铺
select si.shop
from shopitems si, items i
where si.item = i.item
group by si.shop
having count(si.item) =( select count(item) from items);
-- 精确关系除法运算:使用外链接和count函数
-- 即只选择没有剩余商品的店铺
select si.shop
from shopitems si
left join items i -- left outer join 等价于 left join
on si.item = i.item
group by si.shop
having count(si.item) = (select count(item) from items)
and count(i.item) = (select count(item) from items);
练习
-- 1-4-1 修改编号缺失的检查逻辑
select
case when count(*) <> max(seq) then '存在缺失编号' else '不存在缺失编号' end as gap
from seqtbl;
-- 1-4-2 练习“特征函数”
drop table if exists students ;
create table students(
student_id int8,
dpt varchar(10),
sbmt_data date
);
insert into students values(100,'理学院',20051010);
insert into students values(101,'理学院',20050922);
insert into students values(102,'文学院',null);
insert into students values(103,'文学院',20050910);
insert into students values(200,'文学院',20050922);
insert into students values(201,'工学院',NULL);
insert into students values(202,'经济学院',20050925);
-- 全体学生都在9月提交了报告的学院
select dpt, '全体学生都在9月提交了报告的学院' as res
from students
group by dpt
having max(month(sbmt_data))<=9
and count(dpt) = count(sbmt_data);
-- 1-4-3 购物篮分析问题的一般化
select a.shop
,count(b.item) my_item_cnt
,3-count(b.item) diff_cnt
from shopitems a
left join items b
on a.item = b.item
group by a.shop
;
内容多来自 《SQL进阶教材》,仅做笔记。 练习代码均为原创。