#
# Usage: this.sh [dir]
# Result: [dir]/result/filelist (files which contain keys)
# Default dir: /home
keys="型谱|新品|秘密|机密|绝密|涉密|计算机模块研制|处理器适配研改|处理机系统研制|CPU IP核|内场实验|外场实验|嵌入64位CPU|国产化基础软硬件平台|虚拟化|装备承制|许可证|武器装备|承制资格|资格证书"
types="doc docx xls xlsx ppt pptx"
FindDIR=""
# check user
if [ $UID -ne 0 ]; then
echo "Please run $0 with root user!"
exit
fi
# find dir
if [ $# -eq 0 ]; then
FindDIR=/home
else
FindDIR=$1
if [ ! -d $FindDIR ]; then
echo "$FindDIR is not a director!"
exit 1
fi
fi
ResultDIR="${FindDIR}/result"
rm -rf $ResultDIR 2> /dev/null
mkdir -p $ResultDIR
# Find file name contain "keys"
for key in `echo $keys | tr '|' ' '`; do
find $FindDIR -name "*${key}*" >> ${ResultDIR}/filelist
done
# file content contain "keys"
#egrep "(`echo $keys`)" -l -r $FindDIR >> ${ResultDIR}/filelist
# find document(office) file contain "keys"
for t in `echo $types`; do
find $FindDIR -name "*.${t}" >> ${ResultDIR}/document.list
done
sed -i 's/ /|/g' ${ResultDIR}/document.list
for file in `cat ${ResultDIR}/document.list` ; do
unzip `echo $file | tr '|' ' '` -d ${ResultDIR}/tmp > /dev/null 2>&1
egrep "(`echo $keys`)" -r ${ResultDIR}/tmp > /dev/null 2>&1 && echo $file >> ${ResultDIR}/filelist
rm -rf ${ResultDIR}/tmp 2> /dev/null
done
# for ppt file
find ${FindDIR} -name "*.pdf" >> ${ResultDIR}/pdf.list
sed -i 's/ /|/g' ${ResultDIR}/pdf.list
for file in `cat $ResultDIR/pdf.list` ; do
pdftotext "`echo $file | tr '|' ' '`" $ResultDIR/tmp.txt 2> /dev/null
egrep "(`echo $keys`)" ${ResultDIR}/tmp.txt > /dev/null 2>&1 && echo $file >> ${ResultDIR}/filelist
rm -f ${ResultDIR}/tmp.txt 2> /dev/null
done
# clean tmp file
rm -f ${ResultDIR}/document.list
rm -f ${ResultDIR}/pdf.list
# Result
sed -i 's/|/ /g' ${ResultDIR}/filelist
echo "Result: ${ResultDIR}/filelist!"
搜索word文档脚本
#!/bin/bash
#
# Usage: this.sh [dir]
# Result: [dir]/result/filelist (files which contain keys)
# Default dir: /home
keys="型谱|新品|秘密|机密|绝密|涉密|计算机模块研制|处理器适配研改|处理机系统研制|CPU IP核|内场实验|外场实验|嵌入64位CPU|国产化基础软硬件平台|虚拟化|装备承制|许可证|武器装备|承制资格|资格证书|
types="doc docx"
FindDIR=""
# check user
if [ $UID -ne 0 ]; then
echo "Please run $0 with root user!"
exit
fi
# find dir
if [ $# -eq 0 ]; then
FindDIR=/home
else
FindDIR=$1
if [ ! -d $FindDIR ]; then
echo "$FindDIR is not a director!"
exit 1
fi
fi
ResultDIR="${FindDIR}/result"
rm -rf $ResultDIR 2> /dev/null
mkdir -p $ResultDIR
yum install -y antiword
# file content contain "keys"
#egrep "(`echo $keys`)" -l -r $FindDIR >> ${ResultDIR}/filelist
# find document(office) file contain "keys"
for t in `echo $types`; do
find $FindDIR -name "*.${t}" >> ${ResultDIR}/document.list
done
sed -i 's/ /|/g' ${ResultDIR}/document.list
for file in `cat ${ResultDIR}/document.list` ; do
antiword `echo $file | tr '|' ' '` | egrep -i "(`echo $keys`)" > /dev/null 2>&1 && echo $file >> ${ResultDIR}/filelist
rm -rf ${ResultDIR}/tmp 2> /dev/null
done
# for ppt file
find ${FindDIR} -name "*.ppt" >> ${ResultDIR}/filelist
find ${FindDIR} -name "*.pptx" >> ${ResultDIR}/filelist
# clean tmp file
rm -f ${ResultDIR}/document.list
# Result
sed -i 's/|/ /g' ${ResultDIR}/filelist
echo "Result: ${ResultDIR}/filelist!"