Hive 学习(五) Hive基础之HSQL

一,前言

二,连接查询

  2.1 内连接(join | inner join)

  2.2 左外连接(left join | left outer join

  2.3 右外连接(right join | right outer join)

  2.4 全外连接(full join | full outer join)

  2.5 左半连接(left semi join)

三,分组聚合

  3.1 分组聚合须知

  3.2 分组聚合练习1

  3.3 分组聚合练习2

扫描二维码关注公众号,回复: 6261940 查看本文章

 

正文

一,前言

  和其他SQL一样,HSQL与其他的SQL差别不大,在这就进行一些简单的介绍。

二,连接查询

  数据准备和表创建如下:

create table t_a(name string,numb int)
row format delimited
fields terminated by ',';

create table t_b(name string,nick string)
row format delimited
fields terminated by ',';

load data local inpath '/root/hivetest/a.txt' into table t_a;
load data local inpath '/root/hivetest/b.txt' into table t_b;

0: jdbc:hive2://localhost:10000> select * from t_a;
+-----------+-----------+--+
| t_a.name  | t_a.numb  |
+-----------+-----------+--+
| a         | 4         |
| b         | 5         |
| c         | 6         |
| d         | 8         |
+-----------+-----------+--+
4 rows selected (0.197 seconds)
0: jdbc:hive2://localhost:10000> select * from t_b;
+-----------+-----------+--+
| t_b.name  | t_b.nick  |
+-----------+-----------+--+
| a         | laowang   |
| b         | qiangzi   |
| d         | paoge     |
| g         | haizi     |
+-----------+-----------+--+
4 rows selected (0.135 seconds)

  2.1 内连接(join | inner join)

-- 各类join
--1/ 内连接
-- 笛卡尔积
select a.*,b.*
from t_a a inner join t_b b;  // inner join == join

+-----------+-----------+-----------+-----------+--+
| t_a.name  | t_a.numb  | t_b.name  | t_b.nick  |
+-----------+-----------+-----------+-----------+--+
| a         | 4         | a         | laowang   |
| b         | 5         | a         | laowang   |
| c         | 6         | a         | laowang   |
| d         | 8         | a         | laowang   |
| a         | 4         | b         | qiangzi   |
| b         | 5         | b         | qiangzi   |
| c         | 6         | b         | qiangzi   |
| d         | 8         | b         | qiangzi   |
| a         | 4         | d         | paoge     |
| b         | 5         | d         | paoge     |
| c         | 6         | d         | paoge     |
| d         | 8         | d         | paoge     |
| a         | 4         | g         | haizi     |
| b         | 5         | g         | haizi     |
| c         | 6         | g         | haizi     |
| d         | 8         | g         | haizi     |
+-----------+-----------+-----------+-----------+--+
16 rows selected (14.104 seconds)


-- 指定join条件
select a.*,b.*
from 
t_a a join t_b b on a.name=b.name;

+---------+---------+---------+----------+--+
| a.name  | a.numb  | b.name  |  b.nick  |
+---------+---------+---------+----------+--+
| a       | 4       | a       | laowang  |
| b       | 5       | b       | qiangzi  |
| d       | 8       | d       | paoge    |
+---------+---------+---------+----------+--+
3 rows selected (13.429 seconds)

  2.2 左外连接(left join | left outer join)

-- 2/ 左外连接(左连接)
select a.*,b.*
from 
t_a a left outer join t_b b on a.name=b.name;

+---------+---------+---------+----------+--+
| a.name  | a.numb  | b.name  |  b.nick  |
+---------+---------+---------+----------+--+
| a       | 4       | a       | laowang  |
| b       | 5       | b       | qiangzi  |
| c       | 6       | NULL    | NULL     |
| d       | 8       | d       | paoge    |
+---------+---------+---------+----------+--+
4 rows selected (9.25 seconds)

  2.3 右外连接(right join | right outer join)

-- 3/ 右外连接(右连接)
select a.*,b.*
from 
t_a a right outer join t_b b on a.name=b.name;
+---------+---------+---------+----------+--+
| a.name  | a.numb  | b.name  |  b.nick  |
+---------+---------+---------+----------+--+
| a       | 4       | a       | laowang  |
| b       | 5       | b       | qiangzi  |
| d       | 8       | d       | paoge    |
| NULL    | NULL    | g       | haizi    |
+---------+---------+---------+----------+--+
4 rows selected (8.914 seconds)

  2.4 全外连接(full join | full outer join)

-- 4/ 全外连接
select a.*,b.*
from
t_a a full outer join t_b b on a.name=b.name;

+---------+---------+---------+----------+--+
| a.name  | a.numb  | b.name  |  b.nick  |
+---------+---------+---------+----------+--+
| a       | 4       | a       | laowang  |
| b       | 5       | b       | qiangzi  |
| c       | 6       | NULL    | NULL     |
| d       | 8       | d       | paoge    |
| NULL    | NULL    | g       | haizi    |
+---------+---------+---------+----------+--+
5 rows selected (1.74 seconds)

  2.5 左半连接(left semi join)

  注意:left semi是HSQL特有,且没有右半连接

-- 5/ 左半连接 :注意没有右半连接
select a.*
from 
t_a a left semi join t_b b on a.name=b.name;

+---------+---------+--+
| a.name  | a.numb  |
+---------+---------+--+
| a       | 4       |
| b       | 5       |
| d       | 8       |
+---------+---------+--+
3 rows selected (12.236 seconds)

三,分组聚合

  数据下载:access.log

  表创建和数据插入:

create table t_access(ip string,url string,access_time string)
partitioned by (dt string)
row format delimited fields terminated by ',';


-- 导入数据
load data local inpath '/root/hivetest/access.log.0804' into table t_access partition(dt='2017-08-04');
load data local inpath '/root/hivetest/access.log.0805' into table t_access partition(dt='2017-08-05');
load data local inpath '/root/hivetest/access.log.0806' into table t_access partition(dt='2017-08-06');

-- 查看表的分区
show partitions t_access;

  3.1 分组聚合须知

   一旦有group by子句,那么,在select子句中就不能有 (分组字段,聚合函数) 以外的字段

  3.2 分组聚合练习1

-- 针对每一行进行运算
select ip,upper(url),access_time  -- 该表达式是对数据中的每一行进行逐行运算
from t_access;



-- 求每条URL的访问总次数

select url,count(1) as cnts   -- 该表达式是对分好组的数据进行逐组运算
from t_access 
group by url;

+---------------------------------+------+--+
|               url               | _c1  |
+---------------------------------+------+--+
| http://www.edu360.cn/excersize  | 3    |
| http://www.edu360.cn/job        | 7    |
| http://www.edu360.cn/pay        | 1    |
| http://www.edu360.cn/register   | 2    |
| http://www.edu360.cn/stu        | 4    |
| http://www.edu360.cn/teach      | 2    |
+---------------------------------+------+--+
6 rows selected (1.683 seconds)


-- 求每个URL的访问者中ip地址最大的

select url,max(ip)
from t_pv_log
group by url;


+---------------------------------+----------------+--+
|               url               |      _c1       |
+---------------------------------+----------------+--+
| http://www.edu360.cn/excersize  | 192.168.33.46  |
| http://www.edu360.cn/job        | 192.168.33.55  |
| http://www.edu360.cn/pay        | 192.168.34.44  |
| http://www.edu360.cn/register   | 192.168.133.3  |
| http://www.edu360.cn/stu        | 192.168.33.44  |
| http://www.edu360.cn/teach      | 192.168.44.3   |
+---------------------------------+----------------+--+
6 rows selected (1.595 seconds)


-- 求每个用户访问同一个页面的所有记录中,时间最晚的一条

select ip,url,max(access_time) 
from  t_pv_log
group by ip,url;

+----------------+---------------------------------+----------------------+--+
|       it       |               url               |         _c2          |
+----------------+---------------------------------+----------------------+--+
| 192.168.111.3  | http://www.edu360.cn/register   | 2017-08-06 15:35:20  |
| 192.168.133.3  | http://www.edu360.cn/register   | 2017-08-06 15:30:20  |
| 192.168.33.25  | http://www.edu360.cn/job        | 2017-08-06 15:40:20  |
| 192.168.33.3   | http://www.edu360.cn/stu        | 2017-08-05 15:30:20  |
| 192.168.33.3   | http://www.edu360.cn/teach      | 2017-08-04 15:35:20  |
| 192.168.33.36  | http://www.edu360.cn/excersize  | 2017-08-06 16:30:20  |
| 192.168.33.4   | http://www.edu360.cn/job        | 2017-08-04 16:30:20  |
| 192.168.33.4   | http://www.edu360.cn/stu        | 2017-08-04 15:30:20  |
| 192.168.33.44  | http://www.edu360.cn/stu        | 2017-08-05 15:30:20  |
| 192.168.33.46  | http://www.edu360.cn/excersize  | 2017-08-06 16:30:20  |
| 192.168.33.46  | http://www.edu360.cn/job        | 2017-08-05 16:30:20  |
| 192.168.33.5   | http://www.edu360.cn/job        | 2017-08-04 15:40:20  |
| 192.168.33.55  | http://www.edu360.cn/job        | 2017-08-06 15:40:20  |
| 192.168.34.44  | http://www.edu360.cn/pay        | 2017-08-06 15:30:20  |
| 192.168.44.3   | http://www.edu360.cn/teach      | 2017-08-05 15:35:20  |
+----------------+---------------------------------+----------------------+--+
15 rows selected (1.597 seconds)

  3.3 分组聚合练习二

-- 求8月4号以后,每天http://www.edu360.cn/job的总访问次数,及访问者中ip地址中最大的


select dt,'http://www.edu360.cn/job',count(1),max(it)
from t_access
where url='http://www.edu360.cn/job'
group by dt having dt>'2017-08-04';


select dt,max(url),count(1),max(ip)
from t_access
where url='http://www.edu360.cn/job'
group by dt having dt>'2017-08-04';


select dt,url,count(1),max(ip)
from t_access
where url='http://www.edu360.cn/job'
group by dt,url having dt>'2017-08-04';



select dt,url,count(1),max(ip)
from t_access
where url='http://www.edu360.cn/job' and dt>'2017-08-04'
group by dt,url;

+-------------+---------------------------+------+----------------+--+
|     dt      |            _c1            | _c2  |      _c3       |
+-------------+---------------------------+------+----------------+--+
| 2017-08-05  | http://www.edu360.cn/job  | 2    | 192.168.33.55  |
| 2017-08-06  | http://www.edu360.cn/job  | 3    | 192.168.33.55  |
+-------------+---------------------------+------+----------------+--+
2 rows selected (1.609 seconds)

-- 求8月4号以后,每天每个页面的总访问次数,及访问者中ip地址中最大的,且,只查询出总访问次数>2 的记录
-- 方式1:
select dt,url,count(1) as cnts,max(ip)
from t_access
where dt>'2017-08-04'
group by dt,url having cnts>2;


-- 方式2:用子查询
select dt,url,cnts,max_ip
from
(select dt,url,count(1) as cnts,max(ip) as max_ip
from t_access
where dt>'2017-08-04'
group by dt,url) tmp
where cnts>2;


+----------------+---------------------------------+-----------------------+--------------+--+
|  t_access.ip   |          t_access.url           | t_access.access_time  | t_access.dt  |
+----------------+---------------------------------+-----------------------+--------------+--+

| 192.168.33.46  | http://www.edu360.cn/job        | 2017-08-05 16:30:20   | 2017-08-05   |
| 192.168.33.55  | http://www.edu360.cn/job        | 2017-08-05 15:40:20   | 2017-08-05   |
| 192.168.33.55  | http://www.edu360.cn/job        | 2017-08-06 15:40:20   | 2017-08-06   |
| 192.168.33.25  | http://www.edu360.cn/job        | 2017-08-06 15:40:20   | 2017-08-06   |
| 192.168.33.55  | http://www.edu360.cn/job        | 2017-08-06 15:40:20   | 2017-08-06   |
+----------------+---------------------------------+-----------------------+--------------+

  

猜你喜欢

转载自www.cnblogs.com/tashanzhishi/p/10901234.html