第33天单表与多表操作 where筛选 group by分组 having分组后筛选 distinct去重 order by排序 limit限制展示条数 regexp正则多表查询子查询

如何查询表

单表操作

单表操作的基本语法:
select where group by having distinct order by limit regexp like ...

今日内容详细
前期表准备

create table emp(
    id int not null unique auto_increment,
    name varchar(20) not null,
    sex enum("male","female") not null default "male",  # 大部分是男的
    age int(3) unsigned not null default 28,
    hire_date date not null,
    post varchar(50),
    post_comment varchar(100),
    salary double(15,2),    # 薪资总共15位，小数占2位
    office int,  # 一个部门一个屋子
    depart_id int
    );

插入记录
三个部门：教学，销售，运营

insert into
emp(name,sex,age,hire_date,post,salary,office,depart_id)values
("jason","male",18,"20170301","猥琐欲为",1000.22,401,1),    # 以下是教学部
("tom","male",78,"20150302","teacher",10000.31,401,1),
("kevin","male",81,"20130305","teacher",8300.31,401,1),
("tony","male",72,"20150312","teacher",3100,401,1),
("owen","male",28,"20140318","teacher",5000,401,1),
("jack","female",58,"20210323","teacher",1020,401,1),
("jenny","male",38,"20150520","teacher",1501,401,1),
("sank","male",55,"20110310","teacher",8222,401,1),
("哈哈","male",48,"20120321","sale",3333,402,2),   # 以下是销售部
("呵呵","male",55,"20130620","sale",1050,402,2),
("西西","male",38,"20140720","sale",1200.31,402,2),
("嘟嘟","male",18,"20181201","sale",10011,402,2),
("萌萌","male",38,"20100502","sale",10020,402,2),
("拉拉","male",58,"20050325","sale",6250,402,2),
("娜娜","male",18,"20100319","operation",20000,403,3),   # 以下是运营部
("宝宝","female",18,"20150405","operation",20000,403,3),
("辣辣","male",18,"20090315","operation",18220,403,3),
("蟹蟹","male",18,"20080320","operation",50000,403,3);

补充：
    当表字段特别多，展示的时候错乱，可以使用\G分行展示
    select * from emp\G;

几个重要关键字的执行顺序
书写顺序
    select id,name from emp where id > 3;
执行顺序
from 
where
select

虽然执行顺序和书写顺序不一致 你在写sql语句的时候可能不知道怎么写
你就按照书写顺序的方式写sql
    select * 先用*号占位
    之后去补全后面的sql语句
    最后将*号替换后你想要的具体字段

where筛选条件
作用：是对整体数据的一个筛选操作

    1 查询id大于等于3小于等于6的数据
    select id,name from emp where id >=3 and id <= 6;
    select id,name from emp where id between 3 and 6;   两者等价
    
    2 查询薪资是20000或者是18000或者是17000的数据
    select * from emp where salary=20000 or salary=18000 or salary=17000;
    select * from emp where salary in (20000,18000,17000);

    3 查询员工姓名中包含字母o的员工姓名和薪资
    模糊查询
        like 
            % 匹配任意多个字符
            _ 匹配任意单个字符
    select name,salary from emp where name like "%o%";  # %o%表示o的左边和右边可以是任意多个字符
    
    4 查询员工姓名是由四个字符组成的员工姓名和薪资    char_length()   _
    select name,salary from emp where name like "____";     # _代表任意单个字符
    select name,salary from emp where char_length(name)=4;

    5 查询id小于3或者id大于6的数据
    select * from emp where id <3 or id>6;
    select * from emp where id not between 3 and 6;

    6 查询薪资不在20000，18000，17000范围的数据
    select * from emp where salary not in (20000,18000,17000);
    
    7 查询岗位描述为空的员工的姓名和岗位名  针对null不能用等号 用is
    select name,post from emp where post_comment = NULL;    # 报错
    select name,post from emp where post_comment is NULL;

like 模糊查询

	like
		% 匹配任意多个字符
        _ 匹配任意单个字符
	代码语法:
	select * from A where name like "%o%";  # %o%表示o的左边和右边可以是任意多个字符

group by分组
分组实际应用场景分组应用场景非常的多
男女比例
部门平均薪资
部门秃头率
国家之间数据统计

按照部门分组
    select * from emp group by post;

分组之后 最小可操作单位应该是组 而不再是组内的单个数据
    上述命令在你没有设置严格模式的时候是可正常执行的 返回的是分组之后 每个组的第一条数据
    但是这不符合分组的规范：分组之后不应该考虑单个数据 而应该以组为操作
    单位（分组之后 没办法直接获取组内单个数据）
    如果设置了严格模式 那么上述命令会直接报错
    set global sql_mode="strict_trans_tables,only_full_group_by";
    注意:设置好严格模式后退出重启服务端
    查看是否设置好严格模式的命令:show variables like "%mode";    
    设置严格模式后 分组默认只能拿到分组的依据
    select post from emp group by post; 按照什么分组就只能拿到分组
    按照什么分组就只能拿到分组 其他字段不能直接获取 需要借助于一些方法（聚合函数）

什么时候需要分组啊？？
关键字
每个平均最高最低

聚合函数
    max     最大
    min     最小
    sum     求和
    count   查数
    avg     求平均值

    1 获取每个部门的最高薪资
    select post,max(salary) from emp group by post;
    select post as "部门",max(salary) as "最高薪资" from emp group by post;   # as"xx"可以给字段起别名，也可以省略不写
 
    2 获取每个部门的最低薪资
    select post,min(salary) from emp group by post;
   
    3 获取每个部门的平均薪资
    select post,avg(salary) from emp group by post;
   
    4 获取每个部门的薪资总和
    select post,sum(salary) from emp group by post;
   
    5 获取每个部门的人数
    select post,count(id) from emp group by post;    # count统计人数count()，括号里面可以放任意字段，null字段除外
    select post,count(post_comment) from emp group by post;     # 无法统计，count()内加入null字段不行
   
    6 查询分组之后的部门名称和每个部门下所有的员工姓名  group_concat分组后自他字段值的查询
    group_concat不单单可以支持你获取分组之后的其他字段值 还支持拼接操作
    select post,group_concat(name) from emp group by post;  # 统计分组之后所有部门以及部门下的员工名字
    select post,group_concat(name,"_DSB") from emp group by post;   # 统计分组之后所有部门以及部门下的员工名字，且在名字的后面加上_DSB
    select post,group_concat(name,":",salary) from emp group by post;  # 统计分组之后所有部门以及部门下的员工名字以及工资
    concat不分组的时候用
    select concat("NAME:",name),concat("SAL:",salary)from emp;
    
    补充 as语法不单单可以给字段起别名 还可以给表起别名（临时有效）
    select emp.id,emp.name from emp;    # 取出emp表中的id和name
    select emp.id,emp.name from emp as t1;    # 报错
    select t1.id,t1.name from emp as t1;    # 正确写法
    
    7 查询每个人的年薪 12薪
    select name,salary*12 from emp;
    
分组注意事项
    关键字where和group by同时出现的时候group by必须在where的后面 （聚合函数只能在分组之后使用）
    where先对整体数据进行过滤之后再分组操作
    where筛选条件不能使用聚合函数
    select id,name,age from emp where max(salary) > 3000;   # 报错(where筛选条件不能使用聚合函数)
  
    select max(salary) from emp;  # 不分组 默认整体就是一组

    统计各部门年龄在30岁以上的员工平均薪资（操作步骤）
    1 先求所有年龄大于30岁的员工
    select * from emp where age>30;
    
    2 再对结果进行分组
    select * from emp where age>30 group by post;
    
    3 按照部门进行分组之后用聚合函数求平均薪资
    select post,avg(salary) from emp where age>30 group by post;

having分组之后的筛选条件
having的语法跟where是一致的
只不过having是再分组之后进行的过滤操作
即having是可以直接使用聚合函数的

统计各部门年龄在30岁以上的员工平均工资并且保留平均薪资大于5000的部门
select post,avg(salary) from emp 
                    where age>30        
                    group by post   
                    having avg(salary)>5000;

distinct去重

一定要注意 必须是完全一样的数据才可以去重！！！
一定不要将主键忽视了 有主键存在的情况下 是不可能去重的
[{
    
    "id":1,"name":"nana","age":18},
{
    
    "id":2,"name":"nana","age":18},
{
    
    "id":3,"name":"dada","age":18}]

select distinct id,age from emp;    # 报错，无法去重(去重的时候带着主键一起去重了)
select distinct age from emp;   # 正确写法，按照年龄去重

ORM 对象关系映射让不懂sql语句的人也能够非常牛逼的操作数据库
表类
一条条的数据对象
字段对应的值对象的属性
你在写类就意味着在创建表
用类生成对象就意味着在创建数据
对象点属性就意味着在获取数据字段对应的值
目的就是减轻python程序园的压力只需要会python面向对象的知识点就可以操作mysql

order by排序

select * from emp order by salary;
select * from emp order by salary asc;      # 同上命令的意思一样
select * from emp order by salary desc;     # 降序

order by 默认是升序（从低到高） asc 该asc可以省略不写
也可以修改为降序（从高到低） desc

select * from emp order by age desc,salary asc;
# 先按照age降序排，如果碰到age相同 则再按照salary升序排

统计各部门年龄在10岁以上的员工平均工资并且保留平均薪资大于1000的部门,然后对平均工资进行降序排序
select post,avg(salary) from emp    
            where age>10
            group by post
            having avg(salary)>1000
            order by avg(salary) desc;

执行结果如图所示：
在这里插入图片描述

limit限制展示条数

select * from emp;  # 该命令是将所有的数据都展示出来
针对数据过多的情况，我们通常都做分页处理
select * from emp limit 3;  # 只展示emp表中的3条数据

select * from emp limit 0,5;    # 展示1-5的数据
select * from emp limit 5,5;    # 展示6-10的数据
第一个参数是起始位置
第二个参数是展示条数

regexp正则

select * from emp where name regexp "^j.*(n|y)$";   # 取出名字是以j开头，以n或者是y结尾的数据

多表查询

  # 建表
    create table dep(
        id int,
        name varchar(20)
        );
        
    create table emp1(
        id int primary key auto_increment,
        name varchar(20),
        sex enum("male","female")not null default "male",
        age int,
        dep_id int);

  # 插入数据
    insert into dep values
    (200,"技术"),
    (201,"人力资源"),
    (202,"销售"),
    (203,"运营");
    
    insert into emp1(name,sex,age,dep_id) values
    ("jason","male",58,200),
    ("nana","female",18,201),
    ("dada","male",18,201),
    ("kevin","male",48,202),
    ("owen","male",35,203),
    ("jerry","female",28,204);

拼接查询

	select * from dep,emp1;     # 结果 笛卡尔积（了解即可）
	把dep表中的每一条数据都跟emp1表中的每一条数据都进行组合，合并成一张表
	
 拼表操作
    select * from dep,emp1 where emp1.dep_id = dep.id;
    
    mysql也知道，你在后面查询数据的过程中，肯定经常会用到拼表操作，
    所以特地给你开设了对应的方法
    inner join      内连接:只拼接两张表中共有的数据部分
    left join       左连接:左表所有的数据都展示出来，没有对应的项就用null填充
    right join      右连接:右表所有的数据都展示出来，没有对应的项就用null填充
    union           全连接:左右两表所有的数据都展示出来，没有对应的项用null填充
    
    # inner join      内连接(只拼接两张表中共有的数据部分)
    select * from emp1 inner join dep on emp1.dep_id = dep.id;
    # inner join支持多表连接(例如A,B,C三张表)
    select * from A inner join B on A.B_id = B.id inner join C on B.C_id = C.id;
    
    # left join       左连接(左表所有的数据都展示出来，没有对应的项就用null填充)
    select * from emp1 left join dep on emp1.dep_id = dep.id;
    
    # right join      右连接(右表所有的数据都展示出来，没有对应的项就用null填充)
    select * from emp1 right join dep on emp1.dep_id = dep.id;

    # union           全连接(左右两表所有的数据都展示出来,没有对应的项用null填充)
    select * from emp1 left join dep on emp1.dep_id = dep.id
    union
    select * from emp1 right join dep on emp1.dep_id = dep.id;

统计emp1表与dep表中所有部门的平均年龄
# 先连接emp1表与dep两张表
	 select * from emp1 inner join dep on emp1.dep_id = dep.id;
# 再把两张表以部门进行分组求平均年龄
	select dep.name,avg(age) from emp1 inner join dep on emp1.dep_id = dep.id group by dep.name;

子查询（分步查询）

   子查询就是我们平时解决问题的思路
        分步骤解决问题
            第一步
            第二步
            ...
    将一个查询语句的结果当做另一个查询语句的条件去使用
    
   查询部门是技术或者是人力资源的员工信息
        1 先获取部门的id号
        select id from dep where name="技术" or name="人力资源"; 
        
        2 再去员工表里面筛选出对应的员工
        select name from emp1 where dep_id in (200,201);
    
        把上面两个sql语句合并:
        select * from emp1 where dep_id in 
        (select id from dep where name="技术" or name="人力资源");

查询平均年龄在25岁以上的部门名称
只要是多表查询 就有两种思路 联表 子查询
# 联表操作
    1 先拿到部门和员工表 拼接之后的结果
    2 分析语义得出需要进行分组
    select dep.name from emp1 inner join dep
        on emp1.dep_id = dep.id
        group by dep.name
        having avg(age) > 25
        ;

    涉及到多表操作的时候 一定要加上表的前缀
    
# 子查询
    1 先按照部门进行分组,取出平均年龄大于25岁以上的员工部门id
    2 把条件1当成条件，再查询部门名称
    select name from dep where id in
    (select dep_id from emp1 group by dep_id 
    having avg(age)>25);

查询技术部门员工的信息
	select * from emp1 where dep_id = (select id from dep where name="技术");

# in any all这三个在sql语句中只能出现一个，不能同时出现
运行顺序:先运行子查询，得到结果后再运行外层查询
in 
查询技术部门和销售部门的员工
	select * from emp1 where dep_id in 
	(select id from dep where name = "技术" or name = "销售");
	
= any（等于任意一个，跟in的用法一样）
	select * from emp1 where dep_id = any 		# 等于任意一个，= any后面不能直接写结果集，必须跟sql语句
	(select id from dep where name = "技术" or name = "销售");	
	select * from emp1 where dep_id in (200,202);	# 同上意思一样，in后面可以直接写结果集
	select * from emp1 where dep_id = any (200,202);	# 报错，语法错误，，= any后面不能直接写结果集，必须跟sql语句

> any (大于任意一个)
查询员工表中所有员工年龄大于任意部门平均年龄的员工信息（即大于最小部门的平均年龄）
	select * from emp1 where age > any
	(select avg(age) from emp1 group by dep_id);

< any (小于任意一个)
查询员工表中所有员工年龄小于任意部门平均年龄的员工信息（即小于最大部门的平均年龄）
	select * from emp1 where age < any
	(select avg(age) from emp1 group by dep_id);

> all（大于全部）
查询员工表中所有员工年龄大于全部部门平均年龄的员工信息（即大于最大部门的平均年龄）
	select * from emp1 where age > all
	(select avg(age) from emp1 group by dep_id);
	
< all （小于全部）
查询员工表中所有员工年龄小于全部部门平均年龄的员工信息（即小于最小部门的平均年龄）
	select * from emp1 where age < all
	(select avg(age) from emp1 group by dep_id);
	
exist(类似于循环嵌套)（了解即可）
    只返回布尔值 True or False
    返回True的时候外层查询语句执行
    返回False的时候外层查询语句不再执行
    
查询哪一个部门里面是没有员工的
select * from dep 
where exists(
	select * from emp1 where emp1.dep_id = dep.id
);

exist可以这么去理解：
"""
for line in dep：
	for lines in emp1：
		where emp1.dep_id = dep.id
"""

补充知识点not in

sql语句中not in后面不能跟null
代码演示
	在emp1表中插入一个部门为null的数据
	insert into emp1 values(7,"xxx","male",15,null);
	在dep表中插入一个没有人的部门205
	insert into dep values(205,"adc");
	emp1表按照dep_id进行去重
	select distinct dep_id from emp1;
	查询dep表中没有人的部门
	select * from dep where id not in(select distinct dep_id from emp1);	# 错误，没有显示结果，not in后面不能跟null
	select * from dep where id not in(select distinct dep_id from emp1 where dep_id is not null);	# 正确，去除dep_id为null的数据进行查询

总结

    表的查询结果可以作为其他表的查询条件
    也可以通过起别名的方式把它作为一张虚拟表跟其他表关联
    如果想把虚拟表当成一张表跟其他表进行关联，必须得给虚拟表起别名才可以使用虚拟表
    
多表查询就两种方式
        1 先拼接再查询
        2 子查询一步一步来

第33天 单表与多表操作 where筛选 group by分组 having分组后筛选 distinct去重 order by排序 limit限制展示条数 regexp正则 多表查询 子查询

猜你喜欢

第33天单表与多表操作 where筛选 group by分组 having分组后筛选 distinct去重 order by排序 limit限制展示条数 regexp正则多表查询子查询