文本处理三兄弟（grep,sed,awk）

1.基本元字符

正则表达式	描述	示例
^	行起始标志	^tux 匹配以tux起始的行
$	行尾标志	tux$ 匹配以tux结尾的行
.	匹配任意字符	hack. 匹hackl和hacki，但是不能匹配hackl2和hackil,它只能匹配单个字符
[]	匹配包含[字符]之中的任意一个字符	coo[k1]匹配 cook和 cool
[^]	匹配除[^字符]之外的任意一个字符	9[^01]匹配92，93，但是不匹配91或90
[-]	匹配[]中指定范围内的任意一个字符	[1-5]匹配从1～5的任意一个数字
?	匹配之前的项 0次或者 1次	Colou?r 匹配color 或者colour,但是不能匹配colouur
+	匹配之前的项1次或多次	Rollno-9+ 匹配rollno-99,rollno-9,但是不能匹配rollno-
*	匹配之前的项0次或多次	Co*l 匹配cl,col.coool等
（）	创建一个用于匹配的子串	ma（tri）? 匹配max或maxtrix
{n}	匹配之前的项n次	[0-9]{3}匹配任意一个三位数，[0-9]{3}可以扩展为[0-9][0-9][0-9]
{n,}	之前的项至少需要匹配n次	[0-9]{2,}匹配任意一个两位和更多位的数字
{n,m}	指定之前的项所必须匹配的最小次数和最大次数	[0-9]{2,5}匹配从两位数到五位数之间的任意一个数字
\|	交替–匹配两边的任意一项	Oct (1st \| 2nd ) 匹配 oct 1st 和 2nd
\	转义符可以将上面介绍的特殊字符进行转义	a.b 匹配a.b,.通过在.之间加上前缀,从而忽略了.的特殊意义
^$	以空开头，以空结尾	过滤空行

2.grep 和 egrep (文本处理和批处理)

grep -E = egrep
grep       -i       # 忽略字母大小写
grep       -v       #  -v 表示反向过滤
grep       -c	       # 统计匹配行数
grep 	      -q   	    # 静默，无任何输出
grep	       -n	       # 显示匹配结果所在的行号

[root@base1 mnt]#  grep '172.25.254.178' /etc/hosts && echo 'YES' || echo 'NO'172.25.254.178 www.westos.com news.westos.com music.westos.com
YES
[root@base1 mnt]# grep -q '172.25.254.178' /etc/hosts && echo 'YES' || echo 'NO' 
YES

在这里插入图片描述

[root@base1 mnt]# cp /etc/passwd /mnt
[root@base1 mnt]# egrep -c '/sbin/nologin' passwd  # 统计匹配的行数
42
[root@base1 mnt]# grep ^root passwd    # 过滤以root开头的行
root:x:0:0:root:/root:/bin/bash
[root@base1 mnt]# grep root$ passwd     # 过滤以root结尾的行
[root@base1 mnt]# grep -i ^root passwd   # 不区分大小写,过滤以root开头的行
root:x:0:0:root:/root:/bin/bash
[root@base1 mnt]# grep -E  "^root|root$"  passwd #过滤以root开头或者以root结尾的行，-E表示扩展正则表达式

在这里插入图片描述

[root@base1 mnt]# egrep  -v "nologin$"  passwd   #-v表示反向过滤

在这里插入图片描述

[root@base1 mnt]# vim file
  1 root sbin root
  2 sbin root sbin
  3 root root
  4 root sbin sbin
  5 root
  6 sbin
  7 
  8 
  9 color colorful
 10 colorful

在这里插入图片描述

[root@base1 mn]# egrep  -v "^root | root$"  file | grep root   #过滤root在中间的行
[root@base1 mn]#grep -i root file | grep -v -i -E "^root | root$"   #过滤root在中间的行

在这里插入图片描述

[root@base1 mnt]# egrep -vn '.' file    # 过滤空行
[root@base1 mnt]# egrep  -n '^$' file  # 过滤空行

在这里插入图片描述

[root@base1 mnt]# egrep 'o+' file   #  输出包括o,oo,ooo,....,即o至少出现一次

在这里插入图片描述

[root@base1 mnt]# egrep ‘color(ful)?’ file # 末尾的ful最多出现一次，也可以没有

在这里插入图片描述

[root@base1 mnt]# egrep '[a-z]' file

在这里插入图片描述

[root@base1 mnt]# vim file1
  1 root
  2 rot sbinsbin
  3 rooot sbina sbinb
  4 roooot sbinsbinsbin

在这里插入图片描述

[root@base1 mnt]# grep 'r.t' file1    # 过滤r和t之间只有一个字符
[root@base1 mnt]# grep 'r..t' file1   # 过滤r和t之间有二个字符
[root@base1 mnt]# grep 'r...t' file1    # 过滤r和t之间有三个字符
[root@base1 mnt]# grep 'r*t' file1    # 过滤r出现的任意次数
[root@base1 mnt]# grep  'ro*t' file1  # 过滤r和t之间o出现的任意次数

在这里插入图片描述

[root@base1 mnt]# grep  'r\**t' file1  # 过滤r,t之间有0个字符
[root@base1 mnt]# egrep   'ro?t' file1   # 过滤r和t之间o出现的0和1次
[root@base1 mnt]#  egrep  'ro{1,}t'  file1 # 过滤r和t之间o出现的1以上的次数
[root@base1 mnt]# egrep  'ro{,3}t' file1   # 过滤r和t之间o出现的3以下的次数
[root@base1 mnt]# egrep    'r....' file1   # 过滤r之后出现4个字符

在这里插入图片描述

[root@base1 mnt]#  egrep    'r....\>' file1   #精确匹配
[root@base1 mnt]# egrep    "\<r....\>" file1
[root@base1 mnt]# egrep    "....r"  file1
[root@base1 mnt]# egrep    "\<....r"  file1   #过滤r之前出现4个字符

在这里插入图片描述

2.sed

  stream editor,行编辑器，用来操作纯 ASCII 码的文本，一次处理一行，处理时，把当前行
存储在临时缓冲区，处理完之后，输送到屏幕，接着处理下一行 , 这样不断重复 , 直到文件末尾
sed 符合模式条件的处理 不符合条件的不予处理

注：sed并不会改变文件内容，只是在显示的时候显示出我们想要显示的内容，若要想改变文件内容，可以用参数-i实现

（1）p模式（显示）

[root@base1 mnt]# cp /etc/fstab   /mnt
[root@base1 mnt]# sed -n '/^UUID/p' fstab  # 显示fstab中以UUID开头的行

在这里插入图片描述

[root@base1 mnt]# sed -n '/\:/p'   fstab   # 显示含有冒号的行

在这里插入图片描述

[root@base1 mnt]# sed -n '/^#/p' fstab   # 以#开头的行

在这里插入图片描述

[root@base1 mnt]# sed -n '/^#/!p' fstab    # 不以#开头的行

在这里插入图片描述

[root@base1 mnt]# sed -n '2,6p'           #  fstab显示2～6行

在这里插入图片描述

[root@base1 mnt]# sed -n '2,6!p' fstab     # 不显示2～6行

在这里插入图片描述

[root@base1 mnt]# sed -n '2p;6p' fstab     # 显示第二行和第六行

在这里插入图片描述

（2）d模式(删除，只是显示的时候删除，并不删除原文件)

[root@base1 mnt]# sed '/^UUID/d'  fstab   # 删除以UUID开头的行

在这里插入图片描述

[root@base1 mnt]# sed '/^#/d'   fstab     # 删除以#开头的行

在这里插入图片描述

[root@base1 mnt]# sed '/^$/d'   fstab     # 删除以空格开头的行

在这里插入图片描述

[root@base1 mnt]# sed  '/^UUID/!d'  fstab     # 删除不是以UUID开头的行

在这里插入图片描述

[root@base1 mnt]# cat -n fstab | sed '1,4d'      # 删除1到4行

在这里插入图片描述

[root@base1 mnt]# cat -n fstab | sed -e '2d;6d'   # 删除第二行和第六行

在这里插入图片描述

（3）a模式(添加，默认在最后一行)
[root@base1 mnt]# echo hello > file
[root@base1 mnt]# cat file
hello
[root@base1 mnt]# sed '/hello/aworld' file  # 在hello后添加world，默认换行，且添加在最后
hello
world

在这里插入图片描述

（4）i模式(插入)

[root@base1 mnt]# sed '/hello/i\world'   file  # 在hello前加world
world
hello

在这里插入图片描述

（5）c模式（替换）

[root@base1 mnt]# sed '/^UUID/c\hello' fstab  #把以UUID开头的行替换成hello
[root@base1 mnt]# sed 's/\//#/g' /etc/fstab  # 把fatab文件里的/全文替换成#

在这里插入图片描述

（6）w模式（写入）

[root@base1 mnt]# sed '/^UUID/='  fstab   # 显示以UUID开头所在的行号

在这里插入图片描述

[root@base1 mnt]# sed -n '/bash$/p' passwd > file  # 把passwd中以bash结尾的行输入到file中
[root@base1 mnt]# cat file

在这里插入图片描述

[root@base1 mnt]# sed -n '/bash$/wfile1' passwd   # 把passwd中以bash结尾的行输入到file1中

区别：

sed -n '/bash$/p' passwd > file #先处理前面的，之后再执行"> file"
sed -n '/bash$/wfile1' passwd   #边处理边执行

[root@base1 mnt]# cat file
hello
world
[root@base1 mnt]# cat file1
say
talk
speak

在这里插入图片描述

[root@base1 mnt]# sed '/hello/=;wfile1' file # 把file中的hello写入到file1中，并显示行号

在这里插入图片描述

[root@base1 mnt]# sed '=' file  # 显示file的行号和内容

在这里插入图片描述

[root@base1 mnt]# sed '3r file' file1   # 添加file中的内容在file1的第3行后

在这里插入图片描述

练习：安装apache,并把apache的默认端口改为8080

[root@base1 mnt]# vim apache_install.sh 
  1 #!/bin/bash
  2 yum install -y httpd    &> /dev/null
  3 echo "httpd is installed..."
  4 sed -i "/^Listen/cListen $1" /etc/httpd/conf/httpd.conf
  5 systemctl restart httpd
  6 echo "the  port is changed successfully，now the port is $1"

在这里插入图片描述

[root@base1 mnt]# sh apache_install.sh

在这里插入图片描述

[root@base1 mnt]# netstat -antlp | grep httpd

在这里插入图片描述

3.awk # 报告生成器

 awk处理机制：awk会逐行处理文本，支持在处理第一行之前做一些准备工作，以及在处理完最后一行做一些总结性质的工作，在命令格式上分别体现如下：
 BEGIN{}   # 读入第一行文本之前执行，一般用来初始化操作
   -F       # 指定分隔符,默认以空格为分隔符
  {}       # 逐行处理，逐行读入文本执行相应的处理，是最常见的编辑指令块
 END      # 处理完最后一行文本之后执行，一般用来输出处理结果

（1）基本用法

[root@base1 mnt]# awk '{print $0}' file   # $0表示输出全部
this is a file
that is a directory
[root@base1 mnt]# awk '{print $1}' file  # $1表示输出第一列
this
that
[root@base1 mnt]# awk '{print $2}' file  # $2表示输出第二列
is
is
[root@base1 mnt]# awk '{print $3}' file  # $3表示输出第三列
a
a
[root@base1 mnt]# awk '{print $4}' file  # $4表示输出第四列
file
directory

在这里插入图片描述

（2）常用变量

[root@base1 mnt]# awk '{print FILENAME,NR}' passwd  #输出文件名和当前操作的行号
[root@base1 mnt]# awk -F: '{print NR,NF}' passwd   # 输出每次处理的行号以及当前以 : 为分隔符的字段个数
[root@base1 mnt]# awk  'BEGIN{a=34;print a+12}'   # a从34开始，直到a+12结束，打印a的值

在这里插入图片描述

[root@base1 mnt]#awk -F ":" 'BEGIN{print "NAME"}{print NR;print  }END{print "END"}' file   # 以:为分隔符，文件开头加NAME，文件末尾加END，并输出行号和内容

在这里插入图片描述

[root@base1 mnt]# awk -F ":" 'BEGIN{N=0}/bash$/{N++}END{print N}'  passwd   # 统计passwd中以bash结尾的行数
7
[root@base1 mnt]# awk  '/bash$/ ' passwd   # 输出passwd中以bash结尾的行

在这里插入图片描述

[root@base1 mnt]# awk  -F ":" '/bash$/{print $1}'  passwd  # 以：为分隔符，打印以bash结尾的行的第一列

在这里插入图片描述

[root@base1 mnt]# awk  -F : '/^ro/{print}'  passwd  # 打印passwd中以ro开头的行

在这里插入图片描述

[root@base1 mnt]# awk  -F : '/^[a-d]/{print $1,$6}' passwd  # 打印passwd中，以a到d开头的行的第一列和第六列

在这里插入图片描述

[root@base1 mnt]# awk 'NR==3 {print}' passwd   # 打印第三行
daemon:x:2:2:daemon:/sbin:/sbin/nologin
[root@base1 mnt]# awk 'NR%2==0  {print}' passwd   # 打印偶数行
[root@base1 mnt]# awk 'NR%2!=0  {print}' passwd     # 打印奇数行
[root@base1 mnt]# awk 'NR <= 3 {print }'   passwd    # 打印前三行
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
[root@base1 mnt]# cat file

在这里插入图片描述

[root@base1 mnt]# awk 'BEGIN{i=0}{i+=NF}END{print i}'  file  # 统计文本总字段个数
8

练习1:找出系统中UID小于3的用户

[root@base1 mnt]# awk -F: '$3 > 0 && $3 < 3  {print}'  /etc/passwd
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin

练习2：统计文件中的字段数

[root@base1 mnt]# vim test
  1 hello world
  2 say hi
  3 say goodbye
[root@base1 mnt]# awk 'BEGIN{i=0}{i+=NF} END{print i}' test
6

(3)循环语句

If单分支语句：
1.统计登录shell为bash的用户有多少个

	[root@base1 mnt]# awk -F: 'BEGIN{i=0}{if($7~/bash$/){i++}}END{print i}' /etc/passwd
	7

2.打印登录shell为bash的用户

	[root@base1 mnt]# awk -F: '{if($7~/bash$/){print $1}}'   passwd

在这里插入图片描述

3.统计UID小于500的用户有多少个

[root@base1 mnt]# awk -F: 'BEGIN{i=0}{if($3 < 500){i++}}END{print i} ' /etc/passwd
34

（2）If双分支语句

1.统计UID小于等于500和大于500的用户个数

[root@base1 mnt]# awk -F: 'BEGIN{i=0;j=0}{if($3<=500){i++}else{j++}}END{print i,j}' /etc/passwd
34 18

（3）awk的for循环

[root@base1 mnt]# awk 'BEGIN{for(i=1;i<=5;i++){print i}}'

在这里插入图片描述