pig基本语法——group cogroup

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/wild46cat/article/details/82728491

主要参考:

https://book.itxueyuan.com/3b7D/open

https://book.itxueyuan.com/3b7D/6AGg

基础数据

# cat /root/xytest/pig/data/demodata

xiaoxiao,12,12.1

aaa,13,1.1

kjkj,12,12.1

ddf,19,12.8

groupby

grunt> A = load '/root/xytest/pig/data/demodata' using PigStorage(',') as (name:chararray,age:int,gpa:float);

grunt> B = group A by age;

grunt> dump B;

(12,{(kjkj,12,12.1),(xiaoxiao,12,12.1)})

(13,{(aaa,13,1.1)})

(19,{(ddf,19,12.8)})

group也支持多列group

grunt> D = group A by (age,name);

dump D;

((12,kjkj),{(kjkj,12,12.1)})

((12,xiaoxiao),{(xiaoxiao,12,12.1)})

((13,aaa),{(aaa,13,1.1)})

((19,ddf),{(ddf,19,12.8)})

============cogroup==================

数据:

[root@cdh1 data]# cat demodata

xiaoxiao,12,12.1f

aaa,13,1.1f

kjkj,12,12.1f

ddf,19,12.8f

[root@cdh1 data]# cat demodata2

xiaoxiao,99,aaaaaaaaaaaa

aaa,88,bbbbbbbbbbb

kjkj,77,ccccccccccc

ddf,66,dddddddddd

grunt> A = load '/root/xytest/pig/data/demodata' using PigStorage(',') as (name:chararray,age:int,gpa:float);

grunt> B = load '/root/xytest/pig/data/demodata2' using PigStorage(',') as (name:chararray,score:int,address:chararray);

grunt> C = cogroup A by name,B by name;

grunt> dump C;

运行结果:

(aaa,{(aaa,13,1.1)},{(aaa,88,bbbbbbbbbbb)})

(ddf,{(ddf,19,12.8)},{(ddf,66,dddddddddd)})

(kjkj,{(kjkj,12,12.1)},{(kjkj,77,ccccccccccc)})

(xiaoxiao,{(xiaoxiao,12,12.1)},{(xiaoxiao,99,aaaaaaaaaaaa)})

查看C的格式

grunt> describe C;

C: {group: chararray,A: {(name: chararray,age: int,gpa: float)},B: {(name: chararray,score: int,address: chararray)}}

猜你喜欢

转载自blog.csdn.net/wild46cat/article/details/82728491