Java file scanning and searching for strings in multiple files

foreword

If you want to scan which files are in a directory (including subdirectories); or you want to search the contents of all files in a folder, which lines of which files contain the keyword "xxx", then this article can help your turn.

1. Tools



import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.*;

public class SearchWord {
    
    

   //当在文件夹中搜索时,遇到以下非文本文件则跳过
    public static List<String> excludeFileType = Arrays.asList(
            "jar", "zip", "rar", "7z", "tar", "gz", "xz", "bz2", "doc", "class", "pak",
            "xls", "ppt", "pdf", "docx", "xlsx", "pptx", "jpg", "jpge", "gif", "png",
            "xltd", "war", "hprof", "m4a", "swf", "mobi", "jpeg", "tiff", "svg", "psd",
            "mp3", "aac", "mp4", "avi", "flv", "mkv", "mpeg", "msi", "tgz", "mdf",
            "rmvb", "apk", "ts", "map", "car", "mov", "wav", "raw", "dll", "woff",
            "eot", "otf", "ico", "ttf", "ttc", "fon", "dl_", "pd_", "ex_", "etl",
            "sys", "iso", "isz", "esd", "wim", "gho", "dmg", "mpf", "exe", "ldf");

    /**
     * 搜索指定文件中的关键字
     *
     * @param filePath  要搜索的文件路径
     * @param searchStr 要搜索的关键字
     * @return 返回的 map<行数, 该行内容>
     */
    public static Map<Integer, String> scanFile(String filePath, String searchStr) {
    
    
        Map<Integer, String> map = new LinkedHashMap<>();
        FileInputStream file = null; //读取文件为字节流
        try {
    
    
            file = new FileInputStream(filePath);
            InputStreamReader in = new InputStreamReader(file, StandardCharsets.UTF_8); //字节流转化为字符流,以GBK读取防止中文乱码
            BufferedReader buf = new BufferedReader(in); //加入到缓存区
            String str = "";
            int row = 1;
            while ((str = buf.readLine()) != null) {
    
     //按行读取,到达最后一行返回null
                if (str.contains(searchStr)) {
    
    
                    map.put(row, str);
                }
                row++;
            }
            buf.close();
            file.close();
        } catch (FileNotFoundException e) {
    
    
            e.printStackTrace();
        } catch (IOException e) {
    
    
            e.printStackTrace();
        }
        return map;
    }

    /**
     * 扫描dirPath下所有文件
     *
     * @param dirPath 要搜索的文件夹路径
     * @return 返回所有文件的路径
     */
    public static List<String> getAllFilesPath(String dirPath) {
    
    
        List<String> list = new ArrayList<>();
        return getAll_FilesPath(dirPath, list);
    }

    /**
     * 扫描dirPath下所有文件
     *
     * @param dirPath    要搜索的文件夹路径
     * @param excludeDir 不扫描的文件夹名列表
     * @return 返回所有文件的路径
     */
    public static List<String> getAllFilesPathEx(String dirPath, List<String> excludeDir) {
    
    
        List<String> list = new ArrayList<>();
        return getAll_FilesPath(dirPath, list, excludeDir);
    }

    /**
     * 扫描dirPath下的所有文件类型是fileType的文件
     *
     * @param dirPath  要搜索的文件夹路径
     * @param fileType 文件后缀,要扫描的文件的类型
     * @return 返回所有fileType类型文件的路径
     */
    public static List<String> getAllFilesPath(String dirPath, List<String> fileType) {
    
    
        List<String> list = new ArrayList<>();
        return getAllFiles(dirPath, fileType, list);
    }

    /**
     * 扫描dirPath下的所有文件类型是fileType的文件
     *
     * @param dirPath    要搜索的文件夹路径
     * @param fileType   文件后缀,要扫描的文件的类型
     * @param excludeDir 不扫描的文件夹名列表
     * @return 返回所有fileType类型文件的路径
     */
    public static List<String> getAllFilesPath(String dirPath, List<String> fileType, List<String> excludeDir) {
    
    
        List<String> list = new ArrayList<>();
        return getAllFiles(dirPath, fileType, list, excludeDir);
    }

    /**
     * @param dirPath   要搜索的文件夹路径
     * @param searchStr 要搜索的关键字
     * @param fileType  要搜索的文件后缀
     * @return <文件名, <行数, 该行内容>>
     */
    public static Map<String, Map<Integer, String>> searchFiles(String dirPath, String searchStr, List<String> fileType) {
    
    
        return searchFiles(dirPath, searchStr, fileType, null);
    }

    /**
     * @param dirPath    要搜索的文件夹路径
     * @param searchStr  要搜索的关键字
     * @param fileType   要搜索的文件后缀
     * @param excludeDir 不扫描的文件夹名列表
     * @return <文件名, <行数, 该行内容>>
     */
    public static Map<String, Map<Integer, String>> searchFiles(String dirPath, String searchStr, List<String> fileType, List<String> excludeDir) {
    
    
        List<String> allFiles = excludeDir == null || excludeDir.size() == 0 ? getAllFilesPath(dirPath, fileType) : getAllFilesPath(dirPath, fileType, excludeDir);
        Map<String, Map<Integer, String>> searchInfo = new LinkedHashMap<>();
        for (String f : allFiles) {
    
    
            System.out.println("正在文件中搜索,当前搜索文件:" + f);
            Map<Integer, String> map = scanFile(f, searchStr);
            if (map.size() != 0) {
    
    
                searchInfo.put(f, map);
            }
        }
        return searchInfo;
    }


    /**
     * 搜索文件夹下所有可读文件中是否含有要查找的关键字
     *
     * @param dirPath   要搜索的文件夹路径
     * @param searchStr 要搜索的关键字
     * @return <文件名, <行数, 该行内容>>
     */
    public static Map<String, Map<Integer, String>> searchAllFiles(String dirPath, String searchStr) {
    
    
        return searchAllFiles(dirPath, searchStr, null);
    }

    /**
     * 搜索文件夹下所有可读文件中是否含有要查找的关键字
     *
     * @param dirPath    要搜索的文件夹路径
     * @param searchStr  要搜索的关键字
     * @param excludeDir 不扫描的文件夹名列表
     * @return <文件名, <行数, 该行内容>>
     */
    public static Map<String, Map<Integer, String>> searchAllFiles(String dirPath, String searchStr, List<String> excludeDir) {
    
    
        List<String> allFiles = excludeDir == null || excludeDir.size() == 0 ? getAllReadFilessPath(dirPath, new ArrayList<>(), null) : getAllReadFilessPath(dirPath, new ArrayList<>(), excludeDir);
        Map<String, Map<Integer, String>> searchInfo = new LinkedHashMap<>();
        for (String f : allFiles) {
    
    
            System.out.println("正在文件中搜索,当前搜索文件:" + f);
            Map<Integer, String> map = scanFile(f, searchStr);
            if (map.size() != 0) {
    
    
                searchInfo.put(f, map);
            }
        }
        return searchInfo;
    }

    /**
     * @param dirPath   要搜索的文件夹路径
     * @param searchStr 要搜索的关键字
     * @param fileType  要搜索的文件后缀
     * @return <文件名, <行数, 该行内容>>
     */
    public static void searchAndPrint(String dirPath, String searchStr, List<String> fileType) {
    
    
        Map<String, Map<Integer, String>> map = SearchWord.searchFiles(dirPath, searchStr, fileType);
        for (Map.Entry<String, Map<Integer, String>> m : map.entrySet()) {
    
    
            System.out.println("文件路径: " + m.getKey());
            for (Map.Entry<Integer, String> n : m.getValue().entrySet()) {
    
    
                System.out.println("第" + n.getKey() + "行:" + n.getValue());
            }
            System.out.println();
        }
    }

    //获取所有的文件路径,excludeDir是要跳过查询的文件夹名列表
    public static List<String> getAllReadFilessPath(String dirPath, List<String> list, List<String> excludeDir) {
    
    
        File file = new File(dirPath);
        File[] tempList = file.listFiles();
        System.out.println("正在扫描文件夹:" + dirPath);
        if (null != tempList) {
    
    
            for (int i = 0; i < tempList.length; i++) {
    
    
                String filePath = tempList[i].toString();
                String file_Type = filePath.substring(filePath.lastIndexOf(".") + 1);
                if (tempList[i].isFile()) {
    
    
                    //如果是可读的文本文件
                    if (!excludeFileType.contains(file_Type)) {
    
    
                        list.add(filePath);
                    }
                } else {
    
    
                    if (excludeDir == null || excludeDir.size() == 0) {
    
    
                        getAllReadFilessPath(filePath, list, excludeDir);
                    } else {
    
    
                        String dirName = filePath.substring(filePath.lastIndexOf(File.separator) + 1);
                        if (!excludeDir.contains(dirName)) {
    
    
                            //如果是文件夹则递归
                            getAllReadFilessPath(filePath, list, excludeDir);
                        }
                    }
                }
            }
        }
        return list;
    }

    //获取文件夹下所有的文件路径,excludeDir是要跳过查询的文件夹名列表
    public static List<String> getAll_FilesPath(String dirPath, List<String> list, List<String> excludeDir) {
    
    
        File file = new File(dirPath);
        File[] tempList = file.listFiles();
        System.out.println("正在扫描文件夹:" + dirPath);
        if (null != tempList) {
    
    
            for (int i = 0; i < tempList.length; i++) {
    
    
                String filePath = tempList[i].toString();
                if (tempList[i].isFile()) {
    
    
                    list.add(filePath);
                } else {
    
    
                    //如果是文件夹则递归
                    if (excludeDir == null || excludeDir.size() == 0) {
    
    
                        getAll_FilesPath(filePath, list);
                    } else {
    
    
                        String dirName = filePath.substring(filePath.lastIndexOf(File.separator) + 1);
                        if (!excludeDir.contains(dirName)) {
    
    
                            //如果是文件夹则递归
                            getAll_FilesPath(filePath, list, excludeDir);
                        }
                    }
                }
            }
        }
        return list;
    }

    //获取文件夹下所有的文件路径
    public static List<String> getAll_FilesPath(String path, List<String> list) {
    
    
        return getAll_FilesPath(path, list, null);
    }

    //获取所有的文件路径,fileType是所有要查询的文件类型
    public static List<String> getAllFiles(String dirPath, List<String> fileType, List<String> list) {
    
    
        return getAllFiles(dirPath, fileType, list, null);
    }

    //获取所有的文件路径,fileType是所有要查询的文件类型,excludeDir是要跳过查询的文件夹名列表
    public static List<String> getAllFiles(String dirPath, List<String> fileType, List<String> list, List<String> excludeDir) {
    
    
        File file = new File(dirPath);
        File[] tempList = file.listFiles();
        System.out.println("正在扫描文件夹:" + dirPath);
        if (null != tempList) {
    
    
            for (int i = 0; i < tempList.length; i++) {
    
    
                String filePath = tempList[i].toString();
                String file_Type = filePath.substring(filePath.lastIndexOf(".") + 1);
                if (tempList[i].isFile()) {
    
    
                    //如果是文件
                    if (fileType.contains(file_Type)) {
    
    
                        list.add(filePath);
                    }
                } else {
    
    
                    if (excludeDir == null || excludeDir.size() == 0) {
    
    
                        getAllFiles(filePath, fileType, list);
                    } else {
    
    
                        String dirName = filePath.substring(filePath.lastIndexOf(File.separator) + 1);
                        if (!excludeDir.contains(dirName)) {
    
    
                            //如果是文件夹则递归
                            getAllFiles(filePath, fileType, list, excludeDir);
                        }
                    }
                }
            }
        }
        return list;
    }
}



2. Scan all files in the folder

​ Scan all files in D:\Xunlei Download\folder

    public static void main(String[] args) {
    
    
        String dir = "D:\\迅雷下载\\";
        List<String> filesPath = SearchWord.getAllFilesPath(dir);
        filesPath.forEach(System.out::println);
    }

output:

D:\迅雷下载\1623720138.rar
D:\迅雷下载\codeNotes-master.zip
D:\迅雷下载\DittoSetup_64bit_3_24_184_0.exe
D:\迅雷下载\eclipse-inst-jre-win64.exe
D:\迅雷下载\eclipse-jee-2021-09-R-win32-x86_64.zip
D:\迅雷下载\fastjson-1.2.76.jar
D:\迅雷下载\guava-30.1.1-jre.jar
D:\迅雷下载\ideaIU-213.5605.12.exe
D:\迅雷下载\imageglass_8.2.6.6_x64.msi
D:\迅雷下载\latest-win64.zip.xltd
D:\迅雷下载\latest-win64.zip.xltd.cfg
D:\迅雷下载\layDate-v5.3.1\laydate\laydate.js
D:\迅雷下载\layDate-v5.3.1\laydate\theme\default\font\iconfont.woff
D:\迅雷下载\layDate-v5.3.1\laydate\theme\default\laydate.css
D:\迅雷下载\layDate-v5.3.1\test.html
D:\迅雷下载\layDate-v5.3.1\文档\文档.url
D:\迅雷下载\layDate-v5.3.1\更新日志.url
D:\迅雷下载\layDate-v5.3.1.zip
D:\迅雷下载\springboot-demo-maven-master.zip
D:\迅雷下载\yx12345sqlyog.rar
D:\迅雷下载\新建文件夹\ideaIU-2021.1.1.exe
D:\迅雷下载\新建文件夹\note.txt

3. Scan all files under the folder, exclude the specified folder

Scan all files under D:\Xunlei Download\ folder, do not scan files under "layDate-v5.3.1" folder

    public static void main(String[] args) {
    
    
        String dir = "D:\\迅雷下载\\";
        List<String> excludeDir = Arrays.asList("layDate-v5.3.1");
        List<String> filesPath = SearchWord.getAllFilesPathEx(dir,excludeDir);
        filesPath.forEach(System.out::println);
    }

output:

D:\迅雷下载\1623720138.rar
D:\迅雷下载\codeNotes-master.zip
D:\迅雷下载\DittoSetup_64bit_3_24_184_0.exe
D:\迅雷下载\eclipse-inst-jre-win64.exe
D:\迅雷下载\eclipse-jee-2021-09-R-win32-x86_64.zip
D:\迅雷下载\fastjson-1.2.76.jar
D:\迅雷下载\guava-30.1.1-jre.jar
D:\迅雷下载\ideaIU-213.5605.12.exe
D:\迅雷下载\imageglass_8.2.6.6_x64.msi
D:\迅雷下载\latest-win64.zip.xltd
D:\迅雷下载\latest-win64.zip.xltd.cfg
D:\迅雷下载\layDate-v5.3.1.zip
D:\迅雷下载\springboot-demo-maven-master.zip
D:\迅雷下载\yx12345sqlyog.rar
D:\迅雷下载\新建文件夹\ideaIU-2021.1.1.exe
D:\迅雷下载\新建文件夹\note.txt

4. Scan the specified type of files in the folder

​ Scan D:\Xunlei Download\folder for jar, zip, txt files

    public static void main(String[] args) {
    
            
        String dir = "D:\\迅雷下载\\";
        List<String> filesType = Arrays.asList("jar","zip","txt");
        List<String> filesPath = SearchWord.getAllFilesPath(dir,filesType);
        filesPath.forEach(System.out::println);
     }

output:

D:\迅雷下载\codeNotes-master.zip
D:\迅雷下载\eclipse-jee-2021-09-R-win32-x86_64.zip
D:\迅雷下载\fastjson-1.2.76.jar
D:\迅雷下载\guava-30.1.1-jre.jar
D:\迅雷下载\layDate-v5.3.1.zip
D:\迅雷下载\springboot-demo-maven-master.zip
D:\迅雷下载\新建文件夹\note.txt

5. Scan the specified type of files under the folder and exclude the specified folder

Scan the D:\Xunlei Download\ folder for jar, zip, and txt files. If there is a folder named "New Folder", the file under this folder will not be scanned

    public static void main(String[] args) {
    
            
        String dir = "D:\\迅雷下载\\";
        List<String> filesType = Arrays.asList("jar","zip","txt");
        List<String> excludeDir = Arrays.asList("新建文件夹");
        List<String> filesPath = SearchWord.getAllFilesPath(dir,filesType,excludeDir);
        filesPath.forEach(System.out::println);
     }

output:

D:\迅雷下载\codeNotes-master.zip
D:\迅雷下载\eclipse-jee-2021-09-R-win32-x86_64.zip
D:\迅雷下载\fastjson-1.2.76.jar
D:\迅雷下载\guava-30.1.1-jre.jar
D:\迅雷下载\layDate-v5.3.1.zip
D:\迅雷下载\springboot-demo-maven-master.zip

6. Search for a string in the specified file

​ Search for the insertData keyword in the server.log file

    public static void main(String[] args) {
    
            
        String filePath = "D:\\迅雷下载\\server.log";
        String searchWord = "insertData";
        Map<Integer, String> map = SearchWord.scanFile(filePath,searchWord);
        for(Map.Entry<Integer, String> entry : map.entrySet()){
    
    
            System.out.println("第"+entry.getKey()+"行:"+entry.getValue());
        }
     }

output:

67:[http-nio-8080-exec-339  2021 Dec 27 15:10:42]insertData-开始插入数据库list大小:368:[http-nio-8080-exec-339  2021 Dec 27 15:10:42]insertData-nowadays:2021-12-27 15:10:4269:[http-nio-8080-exec-339  2021 Dec 27 15:10:42]insertData-handleResult结束
第70:[http-nio-8080-exec-339  2021 Dec 27 15:10:42]insertData-firm大小:15

7. Search for strings in files of a specified type under a folder

Find the keyword insertData in all "txt", "log" type files under D:\Xunlei Download\

    public static void main(String[] args) {
    
       
	    String dir = "D:\\迅雷下载\\";
        String searchStr = "insertData";
        List<String> fileType = Arrays.asList("txt", "log");
        Map<String, Map<Integer, String>> map = SearchWord.searchFiles(dir, searchStr, fileType);
        for (Map.Entry<String, Map<Integer, String>> m : map.entrySet()) {
    
    
            System.out.println("文件路径: " + m.getKey());
            for (Map.Entry<Integer, String> n : m.getValue().entrySet()) {
    
    
                System.out.println("第" + n.getKey() + "行:" + n.getValue());
            }
            System.out.println();
        }
    }

or

    public static void main(String[] args) {
    
       
		SearchWord.searchAndPrint(dir,searchStr,fileType);
    }

output:

文件路径: D:\迅雷下载\server.log
第67:[http-nio-8080-exec-339  2021 Dec 27 15:10:42]insertData-开始插入数据库list大小:368:[http-nio-8080-exec-339  2021 Dec 27 15:10:42]insertData-nowadays:2021-12-27 15:10:4269:[http-nio-8080-exec-339  2021 Dec 27 15:10:42]insertData-handleResult结束
第70:[http-nio-8080-exec-339  2021 Dec 27 15:10:42]insertData-firm大小:15
    
文件路径: D:\迅雷下载\新建文件夹\note.txt
第15行:insertData-插入数据库
第17行:insertData-fserviceid:21-1419行:insertData-firmpv:120行:insertData-firmuv:121行:insertData-userpv:122行:insertData-useruv:1

8. Search for strings in files of a specified type under a folder, excluding specified folders

Search for the keyword insertData in all "txt", "log" type files under the D:\Xunlei Download\ folder, if the folder name is "New Folder", exclude the search for this folder

    public static void main(String[] args) {
    
       
	    String dir = "D:\\迅雷下载\\";
        String searchStr = "insertData";
        List<String> fileType = Arrays.asList("txt", "log");
        List<String> excludeDir = Arrays.asList("新建文件夹"); //遇到文件夹名为"新建文件夹"则跳过该文件夹
        Map<String, Map<Integer, String>> map = SearchWord.searchFiles(dir, searchStr, fileType, excludeDir);
        for (Map.Entry<String, Map<Integer, String>> m : map.entrySet()) {
    
    
            System.out.println("文件路径: " + m.getKey());
            for (Map.Entry<Integer, String> n : m.getValue().entrySet()) {
    
    
                System.out.println("第" + n.getKey() + "行:" + n.getValue());
            }
            System.out.println();
        }
    }

output:

文件路径: D:\迅雷下载\server.log
第67:[http-nio-8080-exec-339  2021 Dec 27 15:10:42]insertData-开始插入数据库list大小:368:[http-nio-8080-exec-339  2021 Dec 27 15:10:42]insertData-nowadays:2021-12-27 15:10:4269:[http-nio-8080-exec-339  2021 Dec 27 15:10:42]insertData-handleResult结束
第70:[http-nio-8080-exec-339  2021 Dec 27 15:10:42]insertData-firm大小:15

9. Search for a string in all readable files in the specified folder

Search for the keyword ssssss in all text files under the D:\Xunlei Download\ folder. By default, non-text files such as music and video will not be searched

    public static void main(String[] args) throws IOException {
    
    
 		String dir = "D:\\迅雷下载\\";
        String searchStr ="ssssss";
        Map<String, Map<Integer, String>> map = SearchWord.searchAllFiles(dir, searchStr);
        System.out.println("");
        System.out.println("--------------------------------------------------------");
        System.out.println("搜索完成,结果:");
        for (Map.Entry<String, Map<Integer, String>> m : map.entrySet()) {
    
    
            System.out.println("文件路径: " + m.getKey());
            for (Map.Entry<Integer, String> n : m.getValue().entrySet()) {
    
    
                System.out.println("第" + n.getKey() + "行:" + n.getValue());
            }
            System.out.println();
        }
    }

output:

正在扫描文件夹:D:\迅雷下载\
正在扫描文件夹:D:\迅雷下载\layDate-v5.3.1
正在扫描文件夹:D:\迅雷下载\layDate-v5.3.1\laydate
正在扫描文件夹:D:\迅雷下载\layDate-v5.3.1\laydate\theme
正在扫描文件夹:D:\迅雷下载\layDate-v5.3.1\laydate\theme\default
正在扫描文件夹:D:\迅雷下载\layDate-v5.3.1\laydate\theme\default\font
正在扫描文件夹:D:\迅雷下载\layDate-v5.3.1\文档
正在扫描文件夹:D:\迅雷下载\test
正在扫描文件夹:D:\迅雷下载\新建文件夹
正在文件中搜索,当前搜索文件:D:\迅雷下载\aaa
正在文件中搜索,当前搜索文件:D:\迅雷下载\layDate-v5.3.1\laydate\theme\default\font\iconfont.eot
正在文件中搜索,当前搜索文件:D:\迅雷下载\layDate-v5.3.1\laydate\theme\default\font\iconfont.woff
正在文件中搜索,当前搜索文件:D:\迅雷下载\layDate-v5.3.1\laydate\theme\default\laydate.css
正在文件中搜索,当前搜索文件:D:\迅雷下载\layDate-v5.3.1\test.html
正在文件中搜索,当前搜索文件:D:\迅雷下载\layDate-v5.3.1\文档\官网.url


--------------------------------------------------------
搜索完成,结果:
文件路径: D:\迅雷下载\aaa
第1行:ssssss

10. Search for strings in all readable files under the specified folder, excluding the specified folder

Check the specified folder under the D:\Xunlei Download\ folder, and then search for the keyword ssssss in all text files. By default, non-text files such as music and video will not be searched

    public static void main(String[] args) throws IOException {
    
    
        String dir = "D:\\迅雷下载\\";
        String searchStr ="ssssss";
        List<String> excludeDir = Arrays.asList("laydate"); //遇到文件夹名为laydate"则跳过该文件夹
        Map<String, Map<Integer, String>> map = SearchWord.searchAllFiles(dir, searchStr,excludeDir);
        System.out.println("");
        System.out.println("--------------------------------------------------------");
        System.out.println("搜索完成,结果:");
        for (Map.Entry<String, Map<Integer, String>> m : map.entrySet()) {
    
    
            System.out.println("文件路径: " + m.getKey());
            for (Map.Entry<Integer, String> n : m.getValue().entrySet()) {
    
    
                System.out.println("第" + n.getKey() + "行:" + n.getValue());
            }
            System.out.println();
        }
    }

output:

正在扫描文件夹:D:\迅雷下载\
正在扫描文件夹:D:\迅雷下载\layDate-v5.3.1
正在扫描文件夹:D:\迅雷下载\layDate-v5.3.1\文档
正在扫描文件夹:D:\迅雷下载\test
正在扫描文件夹:D:\迅雷下载\新建文件夹
正在文件中搜索,当前搜索文件:D:\迅雷下载\aaa
正在文件中搜索,当前搜索文件:D:\迅雷下载\layDate-v5.3.1\laydate\theme\default\font\iconfont.eot
正在文件中搜索,当前搜索文件:D:\迅雷下载\layDate-v5.3.1\laydate\theme\default\font\iconfont.woff
正在文件中搜索,当前搜索文件:D:\迅雷下载\layDate-v5.3.1\laydate\theme\default\laydate.css
正在文件中搜索,当前搜索文件:D:\迅雷下载\layDate-v5.3.1\test.html
正在文件中搜索,当前搜索文件:D:\迅雷下载\layDate-v5.3.1\文档\官网.url


--------------------------------------------------------
搜索完成,结果:
文件路径: D:\迅雷下载\aaa
第1行:ssssss

11. Use it as a jar package

    public static void main(String[] args) throws IOException {
    
    
        List<String> excludeDir = Arrays.asList("laydate"); //遇到文件夹名为"新建文件夹"则跳过该文件夹
        String dir = ""; //要扫描的文件夹
        String searchStr = ""; //要查找的字符串
        String[] array = null; //不查找的文件夹名
        for (int i = 0; i < args.length; i++) {
    
    
            if (i == 0) {
    
    
                dir = args[i];
            }else if (i == 1) {
    
    
                searchStr = args[i];
            }else if (i == 2) {
    
    
                String tempDirs = args[i];
                array = tempDirs.split(",");
            }
            System.out.println("参数" + (i + 1) + "的值为:" + args[i]);
        }
        Map<String, Map<Integer, String>> map = array == null ? SearchWord.searchAllFiles(dir, searchStr) : SearchWord.searchAllFiles(dir, searchStr, new ArrayList<>(Arrays.asList(array)));
        System.out.println("");
        System.out.println("--------------------------------------------------------");
        System.out.println("搜索完成,结果:");
        for (Map.Entry<String, Map<Integer, String>> m : map.entrySet()) {
    
    
            System.out.println("文件路径: " + m.getKey());
            for (Map.Entry<Integer, String> n : m.getValue().entrySet()) {
    
    
                System.out.println("第" + n.getKey() + "行:" + n.getValue());
            }
            System.out.println();
        }
    }

After it is packaged into a jar package, it can be used on the server. Assuming it is packaged as scan.jar, the command is as follows:

java -jar scan.jar param1 param2 param3

Among them, param1 is the folder to be scanned, param2 is the string to be searched, and param3 is the name of the folder not to be searched separated by commas

For example:
search for files under the D:\myFiles\ folder and search for the location containing the string love, and skip the folder if it encounters a folder named move or music

java -jar scan.jar D:\myFiles\ love move,music

12. Multi-threaded search

Consider using multithreading to make a simple multi-file search string program:
Suppose there are three parameters folderPath, searchStr, containsSubFolder, which respectively represent the folder path, the string to be searched, and whether to search subdirectories.
If containsSubFolder is false, only search for which lines of all readable files under the folderPath folder (not including its subdirectories) contain the key string of searchStr, and output the content of this line; if containsSubFolder is true, search for the folderPath folder containing
its Which lines contain the searchStr key string in all readable files in the subdirectory, and output the content of this line.

Points to consider:
1. Use 2 threads:
- Thread A scans the path of all readable files in the folder and puts them in the queue;
- Thread B continuously fetches file path data from the queue and then searches in the file;
2. When does thread B end:
- After thread A scans, it should return a flag to thread B to tell it that thread A has ended;
- When thread B knows that thread A has ended, thread B needs to determine whether there is any data in the queue , if not, it means that all the files have been searched, and the search will end;
3. Consider the communication between threads, that is, shared variables
4. Consider what queue to use to store the data given by A thread and provide data to B thread
5. Consider The result is output only after both threads A and B are executed.

The following code will use a fixed-length thread pool to create two threads, then use the LinkedBlockingQueue unbounded blocking queue to receive data from thread A and provide data to thread B, use AtomicBoolean to share variables between threads, and use CountDownLatch to wait for the two threads to finish executing before outputting result.

The tool class SearchUtils is as follows:

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.atomic.AtomicBoolean;

public class SearchUtils {
    
    

    //当在文件夹中搜索时,遇到以下常见的非文本文件则跳过
    public static List<String> excludeFileType = Arrays.asList(
            "jar", "zip", "rar", "7z", "tar", "gz", "xz", "bz2", "doc", "class", "pak",
            "xls", "ppt", "pdf", "docx", "xlsx", "pptx", "jpg", "jpge", "gif", "png",
            "xltd", "war", "hprof", "m4a", "swf", "mobi", "jpeg", "tiff", "svg", "psd",
            "mp3", "aac", "mp4", "avi", "flv", "mkv", "mkv", "mpeg", "msi", "tgz",
            "rmvb", "apk", "ts", "map", "car", "mov", "wav", "raw", "dll", "woff",
            "eot", "otf", "ico", "ttf", "ttc", "fon", "dl_", "pd_", "ex_", "etl",
            "sys", "iso", "isz", "esd", "wim", "gho", "dmg", "mpf", "exe", "ldf", "mdf");

    
	/**
     * 获取 folderPath 文件夹下的所有的可读文件的路径,并将 路径存入 allFilesPath 里
     *
     * @param folderPath  要搜索的文件夹路径
     * @param allFilesPath 可读文件的路径存放进allFilesPath
     * @param containsSubFolder 是否搜索子文件夹
     */
    public static void getAllReadFilessPath(String folderPath, BlockingQueue<String> allFilesPath, boolean containsSubFolder) {
    
    
        File file = new File(folderPath);
        File[] tempList = file.listFiles();
        //System.out.println("正在扫描文件夹:" + dirPath);
        if (null == tempList) {
    
    
            return;
        }
        for (int i = 0; i < tempList.length; i++) {
    
    
            String filePath = tempList[i].toString();
            String file_Type = filePath.substring(filePath.lastIndexOf(".") + 1).toLowerCase();;
            if (tempList[i].isFile()) {
    
    
                //如果是文件并且是可读的文本文件
                if (!excludeFileType.contains(file_Type)) {
    
    
                    try {
    
    
                        allFilesPath.put(filePath);
                    } catch (InterruptedException e) {
    
    
                        e.printStackTrace();
                    }
                }
            } else {
    
    
                //如果是文件夹且要读取子文件夹
                if (containsSubFolder) {
    
    
                    getAllReadFilessPath(filePath, allFilesPath, containsSubFolder);
                }
            }
        }
    }

    
	/**
     * 不断从 allFilesPath 里取文件路径然后搜索文件中是否含有关键字,把结果存到 searchResult 里
     *
     * @param allFilesPath  用于从该队列中获取文件路径
     * @param searchStr 要搜索的关键字
     * @param searchResult 存放最终的结果
     * @param scanFinish 要搜索的关键字
     */
    public static void searchAllFiles(BlockingQueue<String> allFilesPath, String searchStr, Map<String, Map<Integer, String>> searchResult, AtomicBoolean scanFinish) {
    
    
        while (true) {
    
    
            String filePath = null;
            try {
    
    
                filePath = allFilesPath.poll(1, TimeUnit.SECONDS);
            } catch (InterruptedException e) {
    
    
                e.printStackTrace();
            }
            //在 filePath 文件中搜索 searchStr 关键字
            Map<Integer, String> map = scanFile(filePath, searchStr);
            if (map.size() != 0) {
    
    
                searchResult.put(filePath, map);
            }
            if (allFilesPath.size() == 0 && scanFinish.get()) {
    
    
                //若队列中的数据已经取完并且扫描线程也已经扫描完就跳出循环
                break;
            }
        }
    }

    /**
     * 搜索指定文件中的关键字
     *
     * @param filePath  要搜索的文件路径
     * @param searchStr 要搜索的关键字
     * @return 返回的 map<行数, 该行内容>
     */
    public static Map<Integer, String> scanFile(String filePath, String searchStr) {
    
    
        Map<Integer, String> map = new LinkedHashMap<>();
        if (filePath == null) {
    
    
            return map;
        }
        FileInputStream file = null; //读取文件为字节流
        try {
    
    
            file = new FileInputStream(filePath);
            InputStreamReader in = new InputStreamReader(file, StandardCharsets.UTF_8); //字节流转化为字符流,以UTF-8读取防止中文乱码
            BufferedReader buf = new BufferedReader(in); //加入到缓存区
            String str = "";
            int row = 1;
            while ((str = buf.readLine()) != null) {
    
     //按行读取,到达最后一行返回null
                if (str.contains(searchStr)) {
    
    
                    map.put(row, str);
                }
                row++;
            }
            buf.close();
            file.close();
        } catch (FileNotFoundException e) {
    
    
            e.printStackTrace();
        } catch (IOException e) {
    
    
            e.printStackTrace();
        }
        return map;
    }
}



Multithreaded code:

   public static void main(String[] args) throws Exception {
    
    
		//搜索 F 盘及其子文件夹下所有可读文件中那些文件包含 "g789d" 关键字
        Map<String, Map<Integer, String>> searchResult = search("F:\\", "g789d", true);
        StringBuffer result = new StringBuffer();
        for (Map.Entry<String, Map<Integer, String>> m : searchResult.entrySet()) {
    
    
            result.append("  文件: " + m.getKey());
            result.append("\n");
            for (Map.Entry<Integer, String> n : m.getValue().entrySet()) {
    
    
                result.append("\t第 " + n.getKey() + " 行:" + n.getValue() + "\n");
            }
            result.append("\n\n ");
        }
        System.out.println(result.toString());

    }

    public static Map<String, Map<Integer, String>> search( String folderPath, String searchStr, boolean containsSubFolder){
    
    
        //存储所有的扫描的可读文件的路径
        BlockingQueue<String> allFilesPath = new LinkedBlockingQueue<>();
        //用于判断扫描线程是否已扫描完成
        AtomicBoolean scanFinish = new AtomicBoolean(false);
        //存储所有搜索的结果<文件名,<行数,内容>>
        Map<String, Map<Integer, String>> searchResult = new LinkedHashMap<>();
        //线程计数器
        CountDownLatch countDownLatch = new CountDownLatch(2);

        //创建一个定长线程池,初始化为2
        ExecutorService fixedThreadPool = Executors.newFixedThreadPool(2);

        //创建一个线程用于扫描所有的可读文件路径,然后存放到 allFilesPath 里,扫描完将 scanFinish 设为 true
        fixedThreadPool.execute(() -> {
    
    
            SearchUtils.getAllReadFilessPath(folderPath, allFilesPath, containsSubFolder);
            scanFinish.set(true);//当此线程扫描完所有线程后将scanFinish设置为true
            countDownLatch.countDown();//线程计数器减一
        });

        //创建一个线程不断从 allFilesPath 里取文件路径然后搜索文件中是否含有关键字,把结果存到 searchResult 里,
        // 当 allFilesPath 为空并且 scanFinish 为true (即上面的线程扫描完了,此线程也把 allFilesPath 里的所有的文件扫描完了),就跳出searchAllFiles方法
        fixedThreadPool.execute(() -> {
    
    
            SearchUtils.searchAllFiles(allFilesPath, searchStr, searchResult, scanFinish);
            countDownLatch.countDown();
        });

        try {
    
    
            //等待上面两个线程执行完
            countDownLatch.await();
        } catch (InterruptedException e) {
    
    
            e.printStackTrace();
        }
        //关闭线程池
        fixedThreadPool.shutdown();
        return searchResult;
    }

result:

  文件: F:\asposeHtml\aa_files\we.txt
	第 4 行:fhfg789dfrh

   文件: F:\fileTypeTest\havaAttachFile\we.txt
	第 4 行:fhfg789dfrh

   文件: F:\spireHtml\myExcel_files\we.txt
	第 4 行:fhfg789dfrh

   文件: F:\we.txt
	第 4 行:fhfg789dfrh
	第 8 行:fhfg789dfrh
	第 12 行:fhfg789dfrh

   文件: F:\ydz\test\新建文件夹\we.txt
	第 4 行:fhfg789dfrh

This example is just a simple producer and consumer problem, and there is only one producer and consumer. If there are multiple producers and consumers, you can refer to:

Producer Consumer Solution using BlockingQueue in Java Thread
Producer-Consumer Problem With Example in Java

Guess you like

Origin blog.csdn.net/qq_33697094/article/details/122237127