ETL常用SQL语句

范例及解析

--建表
CREATE TABLE IF NOT EXISTS  DWS_table(
domain_code string,
province_code string,
local decimal(22,2))
--创建分区
PARTITIONED BY (
data_dt string);

--删除表,若用truncate则为清空表数据及限制,delete删除数据但不删除限制
DROP TABLE IF EXISTS RISK_tmp ;
--用as select的方式直接选择创建表
CREATE TABLE RISK_tmp  AS SELECT    
          test1
         ,test2
         ,test3
FROM  ods_out
--子句顺序 where/order by/having/group by/limit
WHERE data_dt = '${yyyymmdd}'
GROUP BY test1,test2,test3;
--插入数据,overwrite覆盖原有数据,into则直接插入
INSERT OVERWRITE TABLE  DWS_table
--选择插入的分区
PARTITION(data_dt='${yyyymmdd}')  
SELECT               
    T1.test1               AS  test1_code    
    --用||直接拼接两列         
    ,T1.PRCTR||T1.NAME      AS  profitcode  
    --置空 
    ,''                     AS  regroup  
    --截取函数substr("Monday",1,3) 为Mon     
     ,substr(T1.text,1,4)   AS  year  
     --开窗函数over 先by分组字段在by排序字段
     --row_number:显示排序后的行数
     --rank: 显示名次,可以并列排名,下一个排名会跳跃并列个数
     --dense_rank: 显示名次,可以并列排名,下一个排名不会跳跃
     ,ROW_NUMBER() OVER (PARTITION BY sap_code ORDER BY sap_profits DESC)               
    --case语句选择
     ,(CASE WHEN T4.sap_supplier_code IS NOT NULL 
     THEN 1 
     ELSE 0 END)   AS is_test     
     --置null
     ,NULL          AS  name         
FROM  ods_out  T1   
LEFT JOIN (
select 
  domain_code,
  province_code
FROM biz_test2
where domain_code in ('a','b','c')
group by domain_code,province_code ) T2 
ON T1.test = T2.code  
AND T1.text2 = T2.sap   

--%为若干字符,_为单个字符
WHERE company_size  like '小%' 
      or company_size  like  '_微%' 
WHERE SUBSTR(T1.BUDAT,1,6) = SUBSTR('${yyyymmdd}',1,6)   -- 取月数据
;         

增删改查

insert into dws_test(data_dt,sys_id) values(2022,01);
--向分区中插入数据
insert into dws_test partition(data_dt =2022)(sys_id) values(01);
--加字段
alter table tableName add columns (newcol1 int comment ‘新增’);

--删字段
alter table tableName change col_name new_col_name new_type;
DELETE FROM table_name
WHERE some_column=some_value;
--删表
drop table table_name;
--删表结构及数据层
truncate table table_name;

--更新
UPDATE table_name
SET column1=value1,column2=value2
WHERE some_column=some_value;

--改字段
alter table tableName replace columns (col1 int,col2 string,col3 string);
--改表名
use xxxdb; alter table table_name rename to new_table_name;

select * from XXX 
where xx=xx
order by xx desc
having xx > 100
group by xxx
where xx like 'N%';

--查建表语句
show create table XXX;
--查表结构
desc XXX;

增删主键

--删除主键
alter table testDB.risk_pay drop primary key;
alter table testDB.risk_pay drop CONSTRAINT risk_pkey1
--增加主键
alter table testDB.risk_pay add primary key (risk_pay)

增删非空限制

--删除非空限制
alter table xxx alter xxx drop not null;
--增加非空限制
alter table xxx alter xxx set not null;

小知识

可编辑

--设置该表可编辑
ALTER TABLE XXX.xxx REPLICA IDENTITY FULL;

随机数

--生成100-1000之间的随机整数,例如a355
SELECT ‘a’||FLOOR(RANDOM()*900)+100;

--ceil向上取整
ceil(random()*900+100)

从多个字符串中随机取数

SELECT CASE FLOOR(RANDOM()*3)
WHEN 1 THEN
‘x’
WHEN 2 THEN
‘y’
WHEN 3 THEN
‘z’
ELSE
‘a’
ENDcase;

union 与union all并集

两个select列数必须相同,拼接后字段数不变
union自动去重,union all全部显示即行数相加

create table union_test as select * from t_user
union select * from user_test;

时间获取

CONCAT(from_unixtime(unix_timestamp(), ‘yyyy’),0101)
--当前年第一天

CONCAT(from_unixtime(unix_timestamp(), ‘yyyyMM’),01)
--当前月第一天

脱敏处理

将5-15位数字替换为*

UPDATE risk_todo_pool_monitor SET account = substr(account,1,4)||regexp_replace(substr(account,5,15),([0-9]{
   
   15}),***********)||substr(account,16,19)

猜你喜欢

转载自blog.csdn.net/qq_43605229/article/details/129003165