HQL function--Whack-a-mole game and WordCount case analysis and implementation

1. Whac-a-mole

uid,hit,m
1,1,0
1,2,1
1,3,1
1,4,1
1,5,0
1,6,0
1,7,1
2,1,1
2,2,1
2,3,1
2,4,1
2,5,1
3,1,1
3,2,1
3,3,1
3,4,0
3,5,0
3,6,1
3,7,0
3,8,1

create table tb_ds(
      uid int, -- user name
      hit int, -- the number of whack-a-mole
      m int -- whether it hit 1, hit 0, missed
)
row format delimited fields terminated by ',' ;
load data local inpath '/ root/ds.txt' into table tb_ds;

select  * from tb_ds;

 case analysis

Query the user's maximum number of consecutive hits

--查询命中的的记录
select *
from tb_ds
where m = 1;

-- 用户分组 记录编号
select uid, hit, row_number() over (partition by uid order by hit) as rn
from tb_ds
where m = 1;

-- hit-rn 
with t1 as (select uid, hit, row_number() over (partition by uid order by hit) as rn from tb_ds where m = 1)
select *, (hit - rn) as sub
from t1

-- 分组得到每个用户的连续击中
with t1 as (select uid, hit, row_number() over (partition by uid order by hit) as rn from tb_ds where m = 1),
     t2 as (select *, (hit - rn) as sub from t1)
select uid, count(*) as hit_count
from t2
group by uid, sub

-- 用户分组得到每个用户最大次数
with t1 as (select uid, hit, row_number() over (partition by uid order by hit) as rn from tb_ds where m = 1),
    t2 as (select *, (hit-rn) as sub from t1 ),
    t3 as (select uid, count(*) as hit_count from t2 group by uid, sub )
select uid, max(hit_count) as hit_count
from t3
group by uid;

2.WordCount

 1. Create table data

 Why Studying History Matters
Studying a subject that you feel pointless is never a fun or easy task.
If you're study history, asking yourself the question "why is history important" is a very good first step.
History is an essential part of human civilization.
You will find something here that will arouse your interest, or get you thinking about the significance of history.

2.Analysis 

 

 

create table t_wc(
    line string
)row format delimited lines terminated by "\n";

load data local inpath '/root/word.txt' overwrite into table t_wc;
select * from t_wc;
-- 使用正则表达式去掉特殊符号 将 除了单词字符 及 ' 和 空格的 所有字符替换成空串
select regexp_replace(line, '[^a-zA-Z_0-9\'\\s]', "") as word_line
from t_wc;
-- 使用split进行切割 将一行数据切割为一个数组
select split(regexp_replace(line, '[^a-zA-Z_0-9\'\\s]', ""), "\\s+")
from t_wc;
-- 使用expload函数进行炸裂
select explode(split(regexp_replace(line, '[^a-zA-Z_0-9\'\\s]', ""), "\\s+")) as word
from t_wc;
-- 分组得到每个单词的次数
with t1 as (select explode(split(regexp_replace(line, '[^a-zA-Z_0-9\'\\s]', ""), "\\s+")) as word from t_wc)
select word, count(1) as word_count
from t1
group by word;

 

Guess you like

Origin blog.csdn.net/m0_53400772/article/details/130812899