总结:三剑客之awk

能够打印报表
有多种版本:New awk(nawk),GNU awk( gawk)

[root@centos7 ~]#ll `which awk`
lrwxrwxrwx. 1 root root 4 Mar 29 12:06 /usr/bin/awk -> gawk  #系统安装gawk

awk [options] ‘program’ file

  1. options(参数):

    1.1. -F “分隔符” 指明输入时用到的字段分隔符(不写默认空白符为分隔符)
    1.2. -v var=value 变量赋值

  2. program(程序):pattern{action statements;…},通常放在单引号中
    2.1. pattern:BEGIN(打印表头),END(实现统计)
    awk [options] ‘BEGIN{action;… }pattern{action;… }END{action;… }’ file
    2.2. action:print,printf

分隔符:

  1. awk执行时,由分隔符分隔的字段(域)标记$1,$2…$n称为域标识。$0为所有域,注意:此时和shell中变量$符含义不同
  2. 文件的每一行称为记录
  3. 省略action,则默认执行 print $0 的操作
[root@centos7 ~]#awk '{print $1}' /etc/fstab
####options(-F)没有,默认以空白符为分隔符(不分多少),打印第一段
#
#
#
#
#
#
#
UUID=b640a874-b15b-41d1-acc3-eb2a4e85ac9e
[root@centos7 ~]#awk '{print hello}' /etc/fstab




#####/etc/fstab有多少行打印多少行hello,跟/etc/fstab内容没有关系
[root@centos7 ~]#awk '{print "hello"}' /etc/fstab
hello
hello
hello
hello
#####字符串时需加双引号,否则会识别成变量,数字可以不加双引号
[root@centos7 ~]#awk '{print 100*20}' /etc/fstab
2000
2000
2000
2000
2000
[root@centos7 ~]#awk 'BEGIN{print 100*20}'   #打印第一行(表头)
2000
[root@centos7 ~]#awk 'BEGIN{print hello}'    #引号区别

[root@centos7 ~]#awk 'BEGIN{print "hello"}'
hello
[root@centos7 ~]#awk 'BEGIN{print "number"}{print 100*20}'/etc/fstab    #打印表头
number
2000
2000
2000
[root@centos7 ~]#awk 'BEGIN{print "number"}{print 100*20}END{print "end"}' /etc/fstab    #打印表头表尾
number
2000
2000
2000
end

df取利用率
[root@centos7 ~]#df |tr -s " "|cut -d" " -f5 |cut -d% -f1 |sort -nr |head -n1
8
[root@centos7 ~]#df |tr -s " " %|cut -d% -f5 |sort -nr |head -n1
8
[root@centos7 ~]#df |egrep -o '[0-9]+%' |grep -o '[0-9]' |sort -nr|head -n1
8
[root@centos7 ~]#df |sed -rn 's/.*([0-9]+)%.*/\1/p' |sort -nr |head -n1
8
[root@centos7 ~]#df |awk '{print $5}'|cut -d% -f1 |sort -nr |head -n1
8
[root@centos7 ~]#df |awk '{print $5}' |awk -F% '{print $1}' |sort -nr |head -n1
8
[root@centos7 ~]#df |awk -F " +|%" '{print $5}'|sort -nr |head -n1
8

access_log取最多IP
[root@localhost ~]#cat access_log |head -n1
172.18.118.91 - - [20/May/2018:08:09:59 +0800] "GET / HTTP/1.1" 200 912 "-" "Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 5.1; Trident/5.0)"
[root@localhost ~]#awk '{print $1}' access_log |sort |uniq -c |sort -nr |head -n3
   4870 172.20.116.228
   3429 172.20.116.208
   2834 172.20.0.222
[root@localhost ~]#awk -F "[[ ]" '{print $5}' access_log  |head -n2
20/May/2018:08:09:59
20/May/2018:08:09:59
[root@localhost ~]#awk -F "[[ ]" '{print $1,$5}' access_log  |head -n2    $1,$5不填写时默认空格隔开,填写其他隔开符需加双引号
172.18.118.91 20/May/2018:08:09:59
172.18.118.91 20/May/2018:08:09:59
[root@localhost ~]#awk -F "[[ ]" '{print $1"++"$5}' access_log  |head -n2
172.18.118.91++20/May/2018:08:09:59
172.18.118.91++20/May/2018:08:09:59
[root@localhost ~]#awk -F "[[ ]" '{print $1"--"$5}' access_log  |head -n2
172.18.118.91--20/May/2018:08:09:59
[root@localhost ~]#awk -F "[[ ]" '{print $1"\t"$5}' access_log  |head -n2   #\t(tab键)分隔符,能自动对齐
172.18.118.91	20/May/2018:08:09:59
172.18.118.91	20/May/2018:08:09:59

passwd
[root@localhost ~]#awk -F: '{print $1}' /etc/passwd |head -n2
root
bin
###取最后一列
[root@localhost ~]#awk -F: '{print $7}' /etc/passwd |head -n2   #知道每行多少列且列数一样
/bin/bash
/sbin/nologin
[root@localhost ~]#cat /etc/passwd |rev |awk -F: '{print $1}' |rev |head -n2    
/bin/bash
/sbin/nologin
[root@localhost ~]#awk -F: '{print $NF}' /etc/passwd |head -n2  #不知道列数,引用变量
/bin/bash
/sbin/nologin

变量

变量:内置和自定义变量
-v 变量

FS(输入字段分隔符,默认为空白字符)
[root@localhost ~]#awk -F: '{print $1,$3}' /etc/passwd |head -n2
root 0
bin 1
[root@localhost ~]#awk -v FS=":" '{print $1,$3}' /etc/passwd |head -n2   #与前面效果一样,但当输出分隔符为变量时方便
root 0
bin 1
[root@localhost ~]#awk -v FS=":" '{print $1FS$3}' /etc/passwd |head -n2   #引用变量为分隔符
root:0
bin:1
[root@localhost ~]#awk -v FS=":" '{print $1":"$3}' /etc/passwd |head -n2
root:0
bin:1
[root@localhost ~]#fs=:;awk -v FS=$fs '{print $1FS$3}' /etc/passwd |head -n2
root:0
bin:1

OFS(输出字段分隔符,默认为空白字符)
[root@localhost ~]#awk -F "[[ ]" '{print $1,$5}' access_log  |head -n1
172.18.118.91 20/May/2018:08:09:59
[root@localhost ~]#awk -F "[[ ]" -v OFS=: '{print $1,$5}' access_log  |head -n1
172.18.118.91:20/May/2018:08:09:59
[root@localhost ~]#awk -F "[[ ]" -v OFS=++ '{print $1,$5}' access_log  |head -n1
172.18.118.91++20/May/2018:08:09:59
[root@localhost ~]#awk -F "[[ ]" -v OFS=-- '{print $1,$5}' access_log  |head -n1
172.18.118.91--20/May/2018:08:09:59
RS(输入记录分隔符,指定输入时的换行符)

默认记录符为换行符,以记录符之间内容为一行

[root@localhost ~]#cat awk.txt 
a,b,c
d;e,f
ggg;hhhh;xxx
yyy
[root@localhost ~]#awk -F, -v RS=";" '{print $1}' awk.txt 
a
e
hhhh
xxx
yyy
[root@localhost ~]#awk -F, -v RS=";" '{print $3}' awk.txt 
c
d

ORS(:输出记录分隔符,输出时用指定符号代替换行符)
[root@localhost ~]#awk -F: -v ORS=" " '{print $1,$3}' /etc/passwd 
root 0 bin 1 daemon 2 adm 3 lp 4 sync 5 shutdown 6 halt 7 mail 8 operator 11 games 12 ftp 14 nobody 99 systemd-network 192 dbus 81 polkitd 999 sssd 998 libstoragemgmt 997 colord 996 rpc 32 gluster 995 saslauth 994 abrt 173 setroubleshoot 993 rtkit 172 radvd 75 chrony 992 qemu 107 unbound 991 ntp 38 tss 59 usbmuxd 113 geoclue 990 pulse 171 gdm 42 saned 989 rpcuser 29 nfsnobody 65534 gnome-initial-setup 988 sshd 74 avahi 70 postfix 89 tcpdump 72 wang 1000 apache 48 mandriva 1005 slackware 2002 [root@localhost ~]#


NF(字段数量)
[root@localhost ~]#awk -F: '{print NF}' /etc/passwd |head -n2  #每行有7个字段(以:分隔)
7
7
[root@localhost ~]#awk -F: '{print $NF}' /etc/passwd |head -n2  #取最后一个字段
/bin/bash
/sbin/nologin
[root@localhost ~]#awk -F: '{print $(NF-1)}' /etc/passwd |head -n2  #取倒数第二个字段
/root
/bin
[root@localhost ~]#ss -nt
State      Recv-Q Send-Q  Local Address:Port                 Peer Address:Port              
ESTAB      0      52      192.168.50.30:22                   192.168.50.1:65126              
[root@localhost ~]#ss -nt |awk -F " +|:" '{print $(NF-2)}'   #取远程IP
Address
192.168.50.1

NR(记录号,各文件统一编号)

即行号

[root@localhost ~]#awk -F: '{print NR,$1}' /etc/passwd |head -n3
1 root
2 bin
3 daemon
[root@localhost ~]#cat awk.txt 
a,b,c
d;e,f
ggg;hhhh;xxx
yyy
[root@localhost ~]#awk -F, -v RS=";" '{print NR,$1}' awk.txt 
1 a
2 e
3 hhhh
4 xxx
yyy
[root@localhost ~]#awk -F, -v RS=";" '{print NR,$2}' awk.txt 
1 b
2 f
ggg
3 
4 
[root@localhost ~]#awk -F, -v RS=";" '{print NR,$3}' awk.txt 
1 c
d
2 
3 
4 

FNR(记录号,各文件分别计数)
[root@localhost ~]#awk -F, -v RS=";" '{print NR,$3}' awk.txt awk.txt 
1 c
d
2 
3 
4 
5 c   #统一编号
d
6 
7 
8 
[root@localhost ~]#awk -F, -v RS=";" '{print FNR,$3}' awk.txt awk.txt 
1 c
d
2 
3 
4 
1 c    #重新编号
d
2 
3 
4 

FILENAME(当前文件名)
[root@localhost ~]#awk -F, -v RS=";" '{print FILENAME,$3}' awk.txt
awk.txt c
d
awk.txt 
awk.txt 
awk.txt 

ARGC(命令行参数的个数)
[root@localhost ~]#awk -F, -v RS=";" '{print ARGC}' awk.txt
2
2
2
2

ARGV(数组,保存的是命令行所给定的各参数)

参数为命令和文件

[root@localhost ~]#awk -F, -v RS=";" '{print ARGC,ARGV[0]}' awk.txt
2 awk
2 awk
2 awk
2 awk
[root@localhost ~]#awk -F, -v RS=";" '{print ARGC,ARGV[1]}' awk.txt
2 awk.txt
2 awk.txt
2 awk.txt
2 awk.txt
自定义参数
[root@localhost ~]#awk -v name=magedu '{print name}' /etc/issue
magedu
magedu
magedu
[root@localhost ~]#awk -v name=magedu '{print "name"}' /etc/issue
name
name
name
###加引号认为字符,不加识别为参数

[root@localhost ~]#awk -v name=magedu 'BEGIN{print name}' /etc/issue
magedu
[root@localhost ~]#awk -v name=magedu 'BEGIN{name="mage";print name}' /etc/issue
mage
[root@localhost ~]#awk -v name=magedu 'BEGIN{print name;name="mage";print name}' /etc/issue
magedu
mage
####变量可以在‘’里面定义,print打印前一个定义的变量

awk [options] -f programfile file

programfile 文件,内容为引号中的内容{…}

[root@localhost ~]#cat awk.txt
{print $1,$3}
[root@localhost ~]#awk -F: -f awk.txt /etc/passwd |head -n2
root 0
bin 1
[root@localhost ~]#awk -F: '{print $1,$3}' /etc/passwd |head -n2
root 0
bin 1

printf

printf ‘FORMAT’ item1,item2
print 默认换行 printf默认不换行,换行加\n

字符 含义 字符 含义
%c 显示ASCII码 %s 显示字符串
%d %i 显示十进制整数 %f 显示浮点数
%e %E 显示科学计数 %u 无符号整数

修饰符

字符 含义 举例 解释
#[.#] %3.1f 小数(宽度3,1位小数)
%15s 字符串(右对齐,宽度15)
- 左对齐 %-15s 字符串(左对齐,宽度15)
[root@centos7 ~]#echo "a:123.456" |awk -F: '{printf "%10s  %4.1f",$1,$2}'
         a  123.5[root@centos7 ~]#echo "a:123.456" |awk -F: '{printf "%10s  %4.1f\n",$1,$2}'
         a  123.5
[root@centos7 ~]#echo "a:123.456" |awk -F: '{printf "%10s  %4.2f\n",$1,$2}'
         a  123.46
[root@centos7 ~]#echo "a:123.456" |awk -F: '{printf "%10s  %6.2f\n",$1,$2}'
         a  123.46
[root@centos7 ~]#echo "a:123.456" |awk -F: '{printf "%-10s  %-6.2f\n",$1,$2}'
a           123.46

[root@centos7 ~]#awk -F: '{printf "%20s --> %10d\n",$1,$3}' /etc/passwd |head -n3
                root -->          0
                 bin -->          1
              daemon -->          2

[root@centos7 ~]#awk -F: 'BEGIN{print "-----------------------\n|username     |uid    |\n-----------------------\n"}{printf "| %10s | %6d |\n-------------------\n",$1,$3}' /etc/passwd
-----------------------
|username     |uid    |
-----------------------

|       root |      0 |
-------------------
|        bin |      1 |
-------------------
|     daemon |      2 |
-------------------

[root@centos7 ~]#awk -v n=-100 'BEGIN{printf "%d\n",n}'
-100
[root@centos7 ~]#awk -v n=-100 'BEGIN{printf "%u\n",n}'   #最高位位1表示负数,为0表示整数
18446744073709551516
[root@centos7 ~]#echo "obase=2;18446744073709551516" |bc
111111111111111111111111111111111111111111111111111111111001110
##64位系统表示64个字节
[root@centos7 ~]#printf "%u\n" -100  #printf内置命令
18446744073709551516
[root@centos7 ~]#printf "%-10s\n" abc
abc       
[root@centos7 ~]#printf "%10s\n" abc
       abc
[root@centos7 ~]#awk -F: '{printf "username:%20suid:%10d\n",$1,$3}' /etc/passwd |head -n2
username:                rootuid:         0
username:                 binuid:         1
[root@centos7 ~]#awk -F: '{printf "username:%-20suid:%-10d\n",$1,$3}' /etc/passwd |head -n2
username:root                uid:0         
username:bin                 uid:1   

操作符

数字运算操作符

算术操作符:
x+y, x-y, x*y, x/y, x^y, x%y
-x:转换为负数
+x:将字符串转换为数值
字符串操作符:没有符号的操作符,字符串连接

[root@centos7 ~]#awk 'BEGIN{print 2+5}'
7
[root@centos7 ~]#awk 'BEGIN{print 2*5}'
10

赋值操作符

赋值操作符:
=, +=, -=, *=, /=, %=, ^=,++, –

[root@centos7 ~]#awk 'BEGIN{i=1;i++;print i}'
2
[root@centos7 ~]#awk 'BEGIN{i=1;++i;print i}'
2
[root@centos7 ~]#awk 'BEGIN{i=1;print i++}'
1
[root@centos7 ~]#awk 'BEGIN{i=1;print ++i}'
2
[root@centos7 ~]#awk 'BEGIN{i=1;print ++i;print i}'
2
2
[root@centos7 ~]#awk 'BEGIN{i=1;print i++;print i}'
1
2
[root@centos7 ~]#awk 'BEGIN{i=1;i+=3;print i}'
4

比较操作符

比较操作符:
==(等于), !=(不等于), >, >=, <, <=

[root@centos7 ~]#awk -F: '$3>=1000' /etc/passwd   #省略{print $0}
wang:x:1000:1000::/home/wang:/bin/bash
li:x:1001:1001::/home/li:/bin/bash
ww:x:1002:1002::/home/ww:/bin/bash
[root@centos7 ~]#awk -F: '$3>=1000{print $0}' /etc/passwd
wang:x:1000:1000::/home/wang:/bin/bash
li:x:1001:1001::/home/li:/bin/bash
ww:x:1002:1002::/home/ww:/bin/bash
wangzhang:x:1003:1003::/data:/bin/bash
[root@centos7 ~]#awk -F: '$3>=1000{print $3}' /etc/passwd
1000
1001
1002
1003



模式匹配符

模式匹配符:
~:左边是否和右边匹配,包含
!~:是否不匹配

[root@centos7 ~]#awk -F: '$1 ~ "^root"' /etc/passwd
root:x:0:0:root:/root:/bin/bash
[root@centos7 ~]#awk -F: '$0 ~ "^root"' /etc/passwd
root:x:0:0:root:/root:/bin/bash
[root@centos7 ~]#awk -F: '$0 ~ "root"' /etc/passwd   #包含
root:x:0:0:root:/root:/bin/bash
operator:x:11:0:operator:/root:/sbin/nologin
[root@centos7 ~]#awk -F: '$0 !~ "root"' /etc/passwd  #不包含
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
adm:x:3:4:adm:/var/adm:/sbin/nologin
[root@centos7 ~]#awk '$0 ~ "UUID"' /etc/fstab
UUID=b640a874-b15b-41d1-acc3-eb2a4e85ac9e /                       ext4    defaults        1 1
[root@centos7 ~]#awk '$0 ~ "UUID" {print $1}' /etc/fstab
UUID=b640a874-b15b-41d1-acc3-eb2a4e85ac9e
[root@centos7 ~]#awk '$0 ~ "^UUID"' /etc/fstab
UUID=b640a874-b15b-41d1-acc3-eb2a4e85ac9e /                       ext4    defaults        1 1
逻辑操作符

逻辑操作符:与&&,或||,非!

[root@centos7 ~]#awk -F: '$3>100 && $3<=1000' /etc/passwd   #并且&&  或者||
systemd-network:x:192:192:systemd Network Management:/:/sbin/nologin
polkitd:x:999:998:User for polkitd:/:/sbin/nologin
chrony:x:998:996::/var/lib/chrony:/sbin/nologin
wang:x:1000:1000::/home/wang:/bin/bash
[root@centos7 ~]#awk -F: '2<$3 && $3<5 || $3>1013' /etc/passwd
adm:x:3:4:adm:/var/adm:/sbin/nologin
lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin
system-10:x:1014:1014::/data/system-10:/bin/bash
[root@centos7 ~]#awk -F: '!($3>3)' /etc/passwd
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
adm:x:3:4:adm:/var/adm:/sbin/nologin

条件表达式(三目表达式)

selector ? if-true-expression : if-false-expression

[root@centos7 ~]#awk -F: '$3>1000?username="common user":username="system user"{print username,$3}' /etc/passwd
system user 0
system user 1
common user 1013
common user 1014

pattern总结

program(程序):pattern{action statements;…},通常放在单引号中

空模式,匹配每一行
[root@centos7 ~]#awk -F: '{print $1"=="$3}' /etc/passwd
root==0
bin==1
daemon==2

正则表达式

仅处理能够模式匹配到的行,需要用/ /括起来

[root@centos7 ~]#awk -F: '/^r/,/^s/{print $1"=="$3}' /etc/passwd
root==0
bin==1
daemon==2
adm==3
lp==4
sync==5
[root@centos7 ~]#awk -F: '/^root/{print $1"=="$3}' /etc/passwd
root==0
[root@centos7 ~]#awk -F: '$0 ~ "^root" {print $1"=="$3}' /etc/passwd
root==0
[root@centos7 ~]#awk '/^UUID/' /etc/fstab 
UUID=b640a874-b15b-41d1-acc3-eb2a4e85ac9e /                       ext4    defaults        1 1
[root@centos7 ~]#df |awk -F" +|%" '/^\/dev\/vd/{print $1,$5}'
/dev/vda1 8
[root@centos7 ~]#ifconfig eth0 |awk '/netmask/{print $2}'
172.16.189.96
[root@centos7 ~]#ss -nt |awk -F" +|:" '/^ES/{print $(NF-2)}'
100.100.30.25
27.8.31.107


关系表达式,结果为“真”才会被处理

真:结果为非0值,非空字符串
假:结果为空字符串或0值

[root@centos7 ~]#awk -F: '$3>=1000{print$1,$3}' /etc/passwd
wang 1000
li 1001
ww 1002
wangzhang 1003
wangz 1004
[root@centos7 ~]#awk -v i=0 i /etc/passwd     #结果为0不打印
[root@centos7 ~]#awk -v i=0 'i{print $0}' /etc/passwd   #前一命令隐藏{print $0}
[root@centos7 ~]#awk -v i=1 i /etc/passwd    #结果为1打印
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
[root@centos7 ~]#awk -v i=1 'i{print $0}' /etc/passwd
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin


[root@centos7 ~]#awk -v i="" i /etc/passwd   #结果为空不打印
[root@centos7 ~]#awk -v i="" 'i{print $0}' /etc/passwd
[root@centos7 ~]#awk -v i=" " i /etc/passwd   #结果不为空打印
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
[root@centos7 ~]#awk -v i=" " 'i{print $0}' /etc/passwd
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
######非0非空为真######
[root@centos7 ~]#awk -F: 'i=0,i{print $0}' /etc/passwd   #变量赋值放在里面,上面命令放在外面
[root@centos7 ~]#awk -F: 'i="",i{print $0}' /etc/passwd
[root@centos7 ~]#awk -F: 'i=" ",i{print $0}' /etc/passwd
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
[root@centos7 ~]#awk -F: 'i=1000,i{print $0}' /etc/passwd
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin

[root@centos7 ~]#seq 4 |awk '{print i}'    #i没有赋值为空




[root@centos7 ~]#seq 4 |awk '{print !i}'   #取反为1为真
1
1
1
1

[root@centos7 ~]#seq 4 |awk '++i'    #++i先加再处理,i没有赋值为0,加后为1为真打印结果
1
2
3
4
[root@centos7 ~]#seq 4 |awk 'i++'     #i++先处理再加,i没有赋值为0为假不打印结果,后面加后为真打印
2
3
4
[root@centos7 ~]#seq 4 |awk 'i=0'
[root@centos7 ~]#seq 4 |awk 'i=1'
1
2
3
4
[root@centos7 ~]#seq 4 |awk 'i=!i'   #i等于i,i没有赋值为空,非空为真打印结果,此时i为非空对齐取反为空(i为1取反为0)不打印   打印奇数行
1
3
[root@centos7 ~]#seq 4 |awk '!(i=!i)'
2
4
[root@centos7 ~]#seq 4 |awk -v i=1 'i=!i'   #相当于前面命令
2
4
[root@centos7 ~]#seq 4 |awk -v i=0 'i=!i'
1
3

行范围

/pat1/,/pat2/ 不支持直接给出数字格式

[root@centos7 ~]#seq 10 |awk '/5/,/7/'
5
6
7
[root@centos7 ~]#seq 10 |awk 'NR>=5 && NR<=7'   #相当于前一命令,NR为行记录
5
6
7
[root@centos7 ~]#seq 10 |awk 'NR==5 && NR==7'   #比较为==,=为变量赋值
[root@centos7 ~]#seq 10 |awk 'NR==5 || NR==7'
5
7

BEGIN/END模式

BEGIN{}:仅在开始处理文件中的文本之前执行一次
END{}:仅在文本处理完成之后执行一次

[root@centos7 ~]#seq 10|awk 'BEGIN{print "hang num"}{print NR"\t"$0}END{print "pingjunshu"}'
hang num
1	1
2	2
3	3
4	4
5	5
6	6
7	7
8	8
9	9
10	10
pingjunshu
[root@centos7 ~]#awk -F: 'BEGIN{print "user:uid"}/^root/,/^adm/{print $1":"$3}END{print "END FILE"}' /etc/passwd
user:uid
root:0
bin:1
daemon:2
adm:3
END FILE

action函数用法

if

语法:if(condition){statement;…}[else statement]
if(condition1){statement1}else if(condition2){statement2}else{statement3}
使用场景:对awk取得的整行或某个字段做条件判断

[root@centos7 ~]#awk -F: '{if($3>=1000)print $1,$3}' /etc/passwd
wang 1000
li 1001
[root@centos7 ~]#awk -F: '$3>=1000{print $1,$3}' /etc/passwd   #函数需要在{}里面
wang 1000
li 1001
[root@centos7 ~]#seq 10|awk 'i=!i'
1
3
5
7
9
[root@centos7 ~]#seq 10|awk '{if (NR%2==1)print $0}'
1
3
5
7
9
[root@centos7 ~]#seq 4|awk '{if (NR%2==1)print "奇数行",$0;else{print "偶数行",$0}}'
奇数行 1
偶数行 2
奇数行 3
偶数行 4
[root@centos7 ~]#seq 4|awk '{if (NR%2==1){print "奇数行",$0}else{print "偶数行",$0}}'
奇数行 1
偶数行 2
奇数行 3
偶数行 4

[root@centos7 ~]#df |awk -F " +|%" '/^\/dev\/vd/{if($5>=5)print $1"will be full used: "$5}'
/dev/vda1will be full used: 8
[root@centos7 ~]#awk -F: '{if($3>=1000) {printf "Common user: %s\n",$1} else {printf "root or Sysuser: %s\n",$1}}' /etc/passwd
root or Sysuser: root
root or Sysuser: bin
[root@centos7 ~]#awk -F: '{if($3>=1000)printf "Common user: %s\n",$1;else {printf "root or Sysuser: %s\n",$1}}' /etc/passwd
root or Sysuser: root
root or Sysuser: bin

while

语法:while(condition){statement;…}
条件“真”,进入循环;条件“假”,退出循环
使用场景:
对一行内的多个字段逐一类似处理

[root@centos7 ~]#awk '/linux16/{i=1;while(i<=NF){print $i,length($i);i++}}' /boot/grub2/grub.cfg    #统计单词字数  length 自带函数
linux16 7
/boot/vmlinuz-3.10.0-1062.18.1.el7.x86_64 41
root=UUID=b640a874-b15b-41d1-acc3-eb2a4e85ac9e 46
[root@centos7 ~]#awk 'BEGIN{i=1;sum=0;while (i<=100){sum+=i;i++};print "sum="sum}'
sum=5050
[root@centos7 ~]#awk 'BEGIN{i=1;while (i<=100){sum+=i;i++};print "sum="sum}'
sum=5050

for

语法:for(expr1;expr2;expr3) {statement;…}
常见用法:
for(variable assignment;condition;iteration process)
{for-body}
特殊用法:能够遍历数组中的元素
语法:for(var in array) {for-body}

[root@centos7 ~]#awk 'BEGIN{sum=0;for(i=1;i<=100;i++){sum+=i};print "sum="sum}'
sum=5050
[root@centos7 ~]#awk 'BEGIN{for(i=1;i<=100;i++){sum+=i};print "sum="sum}'
sum=5050
[root@centos7 ~]#awk '/linux16/{for(i=1;i<=NF;i++){print $i,length($i)}}' /boot/grub2/grub.cfg 
linux16 7
/boot/vmlinuz-3.10.0-1062.18.1.el7.x86_64 41
root=UUID=b640a874-b15b-41d1-acc3-eb2a4e85ac9e 46
ro 2
[root@centos7 ~]#sum=0;for((i=1;i<=100;i++))do let sum+=i;done; echo $sum
5050
[root@centos7 ~]#seq -s+ 100 |bc
5050
[root@centos7 ~]#awk 'BEGIN{for(i=1;i<=100;i++){sum+=i};print "sum="sum}'
sum=5050

[root@centos7 ~]#time (awk 'time (sum=0;for((i=1;i<=100;i++))do let sum+=i;done; echo $sum)
5050

real	0m0.001s
user	0m0.001s
sys	0m0.000s
[root@centos7 ~]#time (sum=0;for((i=1;i<=1000000;i++))do let sum+=i;done; echo $sum)
500000500000

real	0m7.442s
user	0m6.713s
sys	0m0.699s
[root@centos7 ~]#time (seq -s+ 1000000 |bc)
500000500000

real	0m0.407s
user	0m0.381s
sys	0m0.021s
[root@centos7 ~]#time(total=0;for i in {1..1000000};do let total+=i;done;echo $total)
500000500000

real	0m5.138s
user	0m4.729s
sys	0m0.379s

####for循环最慢awk最快
continue

中断本次循环,后续不变

[root@centos7 ~]#awk 'BEGIN{for(i=0;i<=10;i++){if(i==5)continue;print i}}'
0
1
2
3
4
6
7
8
9
10

break

中断本次及后续循环

[root@centos7 ~]#awk 'BEGIN{for(i=0;i<=10;i++){if(i==5)break;print i}}'
0
1
2
3
4
next

提前结束对本行处理而直接进入下一行处理(awk自身循环)

[root@centos7 ~]#seq 10 |awk '{if(NR==5)next;print $0}'
1
2
3
4
6
7
8
9
10
###对满足条件的行不处理

数组

若要遍历数组中的每个元素,要使用for循环
for(var in array) {for-body}
关联数组:array[index-expression]
index-expression:
•(1) 可使用任意字符串;字符串要使用双引号括起来
•(2) 如果某数组元素事先不存在,在引用时,awk会自动创建此元素,并将其值初始化为“空串”
•(3) 若要判断数组中是否存在某元素,要使用“index in array”格式进行遍历

[root@centos7 ~]#awk 'BEGIN{title["ceo"]="mage";title["coo"]="wangge";title["cto"]="zhange";print title["coo"]}'
wangge

#####数组遍历
[root@centos7 ~]#cat awk.txt 
aa
bb
aa
bb
bb
cc
aa
[root@centos7 ~]#awk '!a[$0]++' awk.txt   #打印第一次
aa
bb
cc
#第一行带入a[aa]为空,取反为非空为真打印,a[aa]值为1,后续再输入a[aa]为1取反为假不打印再加值为2
[root@centos7 ~]#awk 'a[$0]++' awk.txt   #打印重复次数
aa
bb
bb
aa
[root@centos7 ~]#awk '!a[$0]++ {print $0,a[$0]}' awk.txt   #显示数组
aa 1
bb 1
cc 1
[root@centos7 ~]#awk 'a[$0]++ {print $0,a[$0]}' awk.txt 
aa 2
bb 2
bb 3
aa 3
 

[root@centos7 ~]#awk 'BEGIN{title["ceo"]="mage";title["coo"]="wange";title["cto"]="zhange";for(i in title){print i,title[i]}}'
coo wange
ceo mage
cto zhange

[root@localhost ~]#awk '{ip[$1]++}END{for(i in ip){print i,ip[i]}}' access_log 
172.20.0.200 1482
172.20.21.121 2
172.20.30.91 29
172.16.102.29 864
[root@localhost ~]#awk '{ip[$1]++}END{for(i in ip){print i,ip[i]}}' access_log |sort -k2 -nr
172.20.116.228 4870
172.20.116.208 3429
172.20.0.222 2834

[root@localhost ~]#cat ss.log 
State      Recv-Q Send-Q        Local Address:Port          Peer Address:Port 
ESTAB      0      0            123.57.218.140:80           210.21.36.228:17036 
ESTAB      0      0                 127.0.0.1:55388            127.0.0.1:27017 
ESTAB      0      0            123.57.218.140:22         101.200.188.230:42002 
ESTAB      0      96           123.57.218.140:22          61.149.193.234:50314 

[root@localhost ~]#awk '!/State/{state[$1]++}END{for(i in state){print i,state[i]}}' ss.log 
ESTAB 108
FIN-WAIT-1 1
LAST-ACK 3

[root@localhost ~]#awk -F " +|:" '!/State/{state[$(NF-2)]++}END{for(i in state)print i,state[i]}' ss.log |sort -k2 -nr
127.0.0.1 44
113.234.28.244 10
124.64.18.135 8

函数

数值处理

rand():返回0和1之间一个随机数
int():取整数

[root@centos7 ~]#awk 'BEGIN{print rand()*100}'
23.7788
[root@centos7 ~]#awk 'BEGIN{print int(rand()*100)}'
23
[root@centos7 ~]#awk 'BEGIN{print rand()}'
0.237788
[root@localhost ~]# awk 'BEGIN{srand(); for (i=1;i<=10;i++)print int(rand()*100) }'
47
97
94
60
43
55
70
56
88
80

字符串处理

length([s]):返回指定字符串的长度
sub(r,s,[t]):对t字符串搜索r表示模式匹配的内容,并将第一个匹配内容替换为s

[root@localhost ~]# echo "2008:08:08 08:08:08" | awk 'sub(/:/,"-",$1)'
2008-08:08 08:08:08
[root@localhost ~]# echo "2008:08:08 08:08:08" | awk '{sub(/:/,"-",$1);print $0}'  #表示sub对原文件进行了处理
2008-08:08 08:08:08

gsub(r,s,[t]):对t字符串进行搜索r表示的模式匹配的内容,并全部替换为s所表示的内容

[root@localhost ~]# echo "2008:08:08 08:08:08" | awk 'gsub(/:/,"-",$0)'
2008-08-08 08-08-08
[root@localhost ~]# echo "2008:08:08 08:08:08" | awk '{gsub(/:/,"-",$0);print $0}'
2008-08-08 08-08-08

split(s,array,[r]):以r为分隔符,切割字符串s,并将切割后的结果保存至array所表示的数组中,第一个索引值为1,第二个索引值为2,…

[root@localhost ~]# netstat -tn | awk '/^tcp\>/{split($5,ip,":");count[ip[1]]++}END{for (i in count) {print i,count[i]}}'
192.168.20.1 1
[root@localhost ~]# ss -nt
State       Recv-Q Send-Q                                                                  Local Address:Port                                                                                 Peer Address:Port              
ESTAB       0      52                                                                      192.168.20.10:22                                                                                   192.168.20.1:50641              
[root@localhost ~]# ss -nt |awk '{split($NF,ip,":");count[ip[1]]++}END{for(i in count){print i,count[i]}}' 
192.168.20.1 1
Address 1
[root@localhost ~]# ss -nt |awk '/^ES/{split($NF,ip,":");count[ip[1]]++}END{for(i in count){print i,count[i]}}' 
192.168.20.1 1

自定义函数

function name ( parameter, parameter, … ) {
statements
return expression
#()里面为位置参数,相当于$1,$2·····

[root@localhost ~]# cat fun.wak 
function max(x,y) {
	x>y?var=x:var=y    #条件表达式(三目表达式)
	return var
}
BEGIN{a=3;b=2;print max(a,b)}
[root@localhost ~]# awk -f fun.wak 
3

[root@localhost ~]# cat fun.wak 
function max(x,y) {
	x>y?var=x:var=y
	return var
}
BEGIN{print max(a,b)}
[root@localhost ~]# awk -v a=20 -v b=30 -f fun.wak
30
[root@localhost ~]# awk -v a=40 -v b=30 -f fun.wak
40


调用shell命令

[root@localhost ~]# awk 'BEGIN{system("hostname") }'
localhost.localdomain
[root@localhost ~]# awk 'BEGIN{score=100; system("echo your score is " score) }'
your score is 100
[root@localhost ~]# ls
anaconda-ks.cfg  fun.wak  initial-setup-ks.cfg
[root@localhost ~]# awk 'BEGIN{system("rm -f fun.wak")}'    #删除文件fun.wak
[root@localhost ~]# ls
anaconda-ks.cfg  initial-setup-ks.cfg

脚本

[root@localhost ~]# cat f1.awk 
{if($3>=1000)print $1,$3}
[root@localhost ~]# awk -F: -f f1.awk /etc/passwd
nfsnobody 65534
wang 1000

[root@localhost ~]# cat f1.awk 
#!/bin/awk -f
{if($3>=1000)print $1,$3}
[root@localhost ~]# chmod +x f1.awk 
[root@localhost ~]# f1.awk -F: /etc/passwd

作业
[root@localhost ~]#cat test.txt 
1 blog.magedu.com
2 www.magedu.com
[root@localhost ~]#awk -F "[. ]" '{print $2}' test.txt 
blog
www
##随机的整数共5000个,存储的格式100,50,35,89…请取出其中最大和最小的整数
[root@localhost ~]#for((i=1;i<=5000;i++));do echo $RANDOM >>random.txt  ;done
[root@localhost ~]#cat random.txt |tr "\n" "," >random1.txt 
[root@localhost ~]#cat random1.log 
7672,23370,426,5156,2371,17185,26924,2953,28038,15187,26863,
[root@centos7 ~]#awk -F, '{i=1;max=$i;min=$i;while(i<=NF){if($i>max)max=$i;else if($i<min)min=$i;i++}}END{print "min="min,"max="max}' random.log 
min=26 max=32766
##将以下文件内容中FQDN取出并根据其进行计数从高到低排序
[root@centos7 ~]#cat fqdn.log 
http://mail.magedu.com/index.html
http://www.magedu.com/test.html
http://study.magedu.com/index.html
http://blog.magedu.com/index.html
http://www.magedu.com/images/logo.jpg
http://blog.magedu.com/20080102.html
[root@centos7 ~]#awk -F/ '{fqdn[$3]++}END{for(i in fqdn)print fqdn[i],i}' fqdn.log 
2 blog.magedu.com
1 study.magedu.com
1 mail.magedu.com
2 www.magedu.com
##计算男女学生的平均成绩
[root@centos7 ~]#cat score.txt 
姓名 性别 成绩
wang 男 100
li 女 99
wan 男 88
zhang 女 96
tong 男 86
ni 女 78
[root@centos7 ~]#awk '{if($2=="男"){m_sum+=$3;m_num++}else if($2=="女"){f_sum+=$3;f_num++}}END{print "男生平均成绩:"m_sum/m_num,"女生平均成绩:"f_sum/f_num}' score.txt 
男生平均成绩:91.3333 女生平均成绩:91
[root@centos7 ~]#awk 'NR!=1{sum[$2]+=$3;num[$2]++}END{for(i in sum){print i,sum[i]/num[i]}}' score.txt 
男 91.3333
女 91


[root@centos7 ~]#echo 'Yd$C@M05MB%9&Bdh7dq+YVixp3vpw' |awk -F "[[:alpha:]]" -v RS="[%&]" '{i=1;while(i<=NF){if($i ~ "[0-9]"){print $i};i++}}'
05
9
7
3

猜你喜欢

转载自blog.csdn.net/wauzy/article/details/106418432