EDITORIAL
Set theory is the foundation of the SQL language, because this feature, also known as set-oriented SQL language
Import articles: A few notes set operations
Note 1: SQL can operate with a set of (multiset, bag) duplicate rows, can be supported by ALL options
SQL provides a set of operators permit repeated and allowed to repeat two uses, UNION and INTERSECT results do not appear in duplicate rows, UNION ALL is retained duplicate rows; and opposite effects ALL SELECT clause DISTINCT. ALL help optimize query performance, because no sort after use ALL
Set of operator precedence is present: Note 2
Standard SQL provisions: INTERSECT and EXCEPT higher priority than UNION
Note 3: different degree of realization of various DBMS provider on the set operations
MySQL does not support, Oracle uses alternative EXCEPT MINUS
Note 4: there is no standard definition of the division operation
And four operations (UNION), the difference (EXCEPT), product (CROSS JOIN) are introduced into a standard SQL, providers have been slow to enter the SQL standard
Comparison of Tables: check that the collection of articles based on equality
/* 比较表和表:检查集合相等性 */
CREATE TABLE Tbl_A
(keycol CHAR(1) PRIMARY KEY,
col_1 INTEGER,
col_2 INTEGER,
col_3 INTEGER);
CREATE TABLE Tbl_B
(keycol CHAR(1) PRIMARY KEY,
col_1 INTEGER,
col_2 INTEGER,
col_3 INTEGER);
/* 表相等的情况 */
DELETE FROM Tbl_A;
INSERT INTO Tbl_A VALUES('A', 2, 3, 4);
INSERT INTO Tbl_A VALUES('B', 0, 7, 9);
INSERT INTO Tbl_A VALUES('C', 5, 1, 6);
DELETE FROM Tbl_B;
INSERT INTO Tbl_B VALUES('A', 2, 3, 4);
INSERT INTO Tbl_B VALUES('B', 0, 7, 9);
INSERT INTO Tbl_B VALUES('C', 5, 1, 6);
/* B行不同的情况 */
DELETE FROM Tbl_A;
INSERT INTO Tbl_A VALUES('A', 2, 3, 4);
INSERT INTO Tbl_A VALUES('B', 0, 7, 9);
INSERT INTO Tbl_A VALUES('C', 5, 1, 6);
DELETE FROM Tbl_B;
INSERT INTO Tbl_B VALUES('A', 2, 3, 4);
INSERT INTO Tbl_B VALUES('B', 0, 7, 8);
INSERT INTO Tbl_B VALUES('C', 5, 1, 6);
/* 包含NULL的情况(相等) */
DELETE FROM Tbl_A;
INSERT INTO Tbl_A VALUES('A', NULL, 3, 4);
INSERT INTO Tbl_A VALUES('B', 0, 7, 9);
INSERT INTO Tbl_A VALUES('C', NULL, NULL, NULL);
DELETE FROM Tbl_B;
INSERT INTO Tbl_B VALUES('A', NULL, 3, 4);
INSERT INTO Tbl_B VALUES('B', 0, 7, 9);
INSERT INTO Tbl_B VALUES('C', NULL, NULL, NULL);
/* 包含NULL的情况(C行不同) */
DELETE FROM Tbl_A;
INSERT INTO Tbl_A VALUES('A', NULL, 3, 4);
INSERT INTO Tbl_A VALUES('B', 0, 7, 9);
INSERT INTO Tbl_A VALUES('C', NULL, NULL, NULL);
DELETE FROM Tbl_B;
INSERT INTO Tbl_B VALUES('A', NULL, 3, 4);
INSERT INTO Tbl_B VALUES('B', 0, 7, 9);
INSERT INTO Tbl_B VALUES('C', 0, NULL, NULL);
If the same two tables, the following logic: A UNION B = A = B ; also A \ (\ Cup \) B = A \ (\ CAP \) B
-- 判断两表是否完全相等(判断之前可以看看行数相不相同)
SELECT COUNT(*) AS row_cnt FROM
((SELECT * FROM Tbl_A )
UNION
(SELECT * FROM Tbl_B)) AS Total;
To the table above, we find that for any S table has the following equation holds: S UNION S = S This is called idempotency, plus how many times the same set of results are the same.
Comparison of Tables: check that the collection of equality Advanced articles
Analyzing two sets are equal in set theory, the following two general methods:
- A \(\subset\) B 且 A \(\supset\) B \(\Leftrightarrow\) A = B
- A \(\cup\) B = A \(\cap\) B \(\Leftrightarrow\) A = B
-- A union B = A intersect B means A = B,不难发现intersect也是一个幂等运算符
-- 两张表相等时返回"相等",否则返回"不相等"
SELECT CASE WHEN COUNT(*) = 0
THEN '相等' ELSE '不相等' END AS result
FROM ((SELECT * FROM tbl_A) UNION (SELECT * FROM tbl_B)
EXCEPT
(SELECT * FROM tbl_A) INTERSECT (SELECT * FROM tbl_B)) AS TMP;
-- 查看量表不一样的记录
(SELECT * FROM Tbl_A EXCEPT SELECT * FROM Tbl_B)
UNION ALL
(SELECT * FROM Tbl_B EXCEPT SELECT * FROM Tbl_A);
Relationship to achieve a differential current division
- Nested NOT EXISTS
- Using the HAVING clause is converted to one relationship
- The repetition becomes subtraction
-- 建表语句
/* 用差集实现关系除法运算 */
CREATE TABLE Skills
(skill VARCHAR(32),
PRIMARY KEY(skill));
CREATE TABLE EmpSkills
(emp VARCHAR(32),
skill VARCHAR(32),
PRIMARY KEY(emp, skill));
INSERT INTO Skills VALUES('Oracle');
INSERT INTO Skills VALUES('UNIX');
INSERT INTO Skills VALUES('Java');
INSERT INTO EmpSkills VALUES('相田', 'Oracle');
INSERT INTO EmpSkills VALUES('相田', 'UNIX');
INSERT INTO EmpSkills VALUES('相田', 'Java');
INSERT INTO EmpSkills VALUES('相田', 'C#');
INSERT INTO EmpSkills VALUES('神崎', 'Oracle');
INSERT INTO EmpSkills VALUES('神崎', 'UNIX');
INSERT INTO EmpSkills VALUES('神崎', 'Java');
INSERT INTO EmpSkills VALUES('平井', 'UNIX');
INSERT INTO EmpSkills VALUES('平井', 'Oracle');
INSERT INTO EmpSkills VALUES('平井', 'PHP');
INSERT INTO EmpSkills VALUES('平井', 'Perl');
INSERT INTO EmpSkills VALUES('平井', 'C++');
INSERT INTO EmpSkills VALUES('若田部', 'Perl');
INSERT INTO EmpSkills VALUES('渡来', 'Oracle');
-- 用求差集的方法进行关系除法运算(有余数)
SELECT DISTINCT emp
FROM EmpSkills ES1
WHERE NOT EXISTS
(SELECT skill FROM Skills EXCEPT SELECT skill FROM EmpSkills ES2 WHERE ES1.emp = ES2.emp);
Looking for a subset of equal
/* 4.寻找相等的子集 */
CREATE TABLE SupParts
(sup CHAR(32) NOT NULL,
part CHAR(32) NOT NULL,
PRIMARY KEY(sup, part));
INSERT INTO SupParts VALUES('A', '螺丝');
INSERT INTO SupParts VALUES('A', '螺母');
INSERT INTO SupParts VALUES('A', '管子');
INSERT INTO SupParts VALUES('B', '螺丝');
INSERT INTO SupParts VALUES('B', '管子');
INSERT INTO SupParts VALUES('C', '螺丝');
INSERT INTO SupParts VALUES('C', '螺母');
INSERT INTO SupParts VALUES('C', '管子');
INSERT INTO SupParts VALUES('D', '螺丝');
INSERT INTO SupParts VALUES('D', '管子');
INSERT INTO SupParts VALUES('E', '保险丝');
INSERT INTO SupParts VALUES('E', '螺母');
INSERT INTO SupParts VALUES('E', '管子');
INSERT INTO SupParts VALUES('F', '保险丝');
-- 生成供应商的全部组合
SELECT SP1.sup AS s1,SP2.sup AS s2
FROM SupParts SP1,SupParts SP2
WHERE SP1.sup < SP2.sup
GROUP BY SP1.sup,SP2.sup;
SELECT SP1.sup AS s1,SP2.sup AS s2
FROM SupParts SP1,SupParts SP2
WHERE SP1.sup < SP2.sup
AND SP1.part = SP2.part
GROUP BY SP1.sup,SP2.sup
HAVING COUNT(*) = (SELECT COUNT(*) FROM SupParts SP3 WHERE SP3.sup = SP1.sup)
AND COUNT(*) = (SELECT COUNT(*) FROM SupParts SP4 WHERE SP4.sup = SP2.sup);
For efficient SQL delete duplicate rows
/* 5.用于删除重复行的高效SQL */
/* 在PostgreSQL中,需要把“with oids”添加到CREATE TABLE语句的最后 */
CREATE TABLE Products
(name CHAR(16),
price INTEGER);
INSERT INTO Products VALUES('苹果', 50);
INSERT INTO Products VALUES('橘子', 100);
INSERT INTO Products VALUES('橘子', 100);
INSERT INTO Products VALUES('橘子', 100);
INSERT INTO Products VALUES('香蕉', 80);
-- 删除重复行:使用关联子查询
DELETE FROM Products
WHERE rowid < (SELECT MAX(P2.rowid) FROM Products P2 WHERE Products.name = P2.name AND Product.price = P2.price);
-- 用于删除重复行的高效SQL语句(1):通过EXCEPT求补集
DELETE FROM Products
WHERE rowid IN (SELECT rowid FROM Products EXCEPT SELECT MAX(rowid) FROM Products GROUP BY name,price)
-- 用于删除重复行的高效SQL语句(2):通过NOT IN求补集
DELETE FROM Products
WHERE rowid NOT IN (SELECT MAX(rowid) FROM Products GROUP BY name,price)
summary
- Terms of set operations, SQL standardization carried out very slowly, need to pay attention when using
- If you do not specify ALL set operator option, duplicate rows would be excluded, but also the sort happened in this case, so the performance is not good enough
- UNION and INTERSECT are idempotent, while EXCEPT do not idempotent
- Standard SQL does not matter division operators need to implement
- Analyzing two sets are equal, one mapping can be two ways by power or the like
- Using EXCEPT can be obtained simply complement
Exercises
/* 练习题1-7-1:改进“只使用UNION的比较” */
SELECT CASE WHEN COUNT(*) = (SELECT COUNT(*) FROM tbl_A )
AND COUNT(*) = (SELECT COUNT(*) FROM tbl_B )
THEN '相等'
ELSE '不相等' END AS result
FROM ( SELECT * FROM tbl_A
UNION
SELECT * FROM tbl_B ) TMP;
/* 练习题1-7-2:精确关系除法运算 */
SELECT DISTINCT emp
FROM EmpSkills ES1
WHERE NOT EXISTS
(SELECT skill
FROM Skills
EXCEPT
SELECT skill
FROM EmpSkills ES2
WHERE ES1.emp = ES2.emp)
AND NOT EXISTS
(SELECT skill
FROM EmpSkills ES3
WHERE ES1.emp = ES3.emp
EXCEPT
SELECT skill
FROM Skills );
/* 练习题1-7-2:精确关系除法运算 */
SELECT emp
FROM EmpSkills ES1
WHERE NOT EXISTS
(SELECT skill
FROM Skills
EXCEPT
SELECT skill
FROM EmpSkills ES2
WHERE ES1.emp = ES2.emp)
GROUP BY emp
HAVING COUNT(*) = (SELECT COUNT(*) FROM Skills);