Download the URL in xlsx to the specified directory

  Love is a lamp, friendship is a shadow. When the lamp goes out, you will find that you are surrounded by shadows. Friends are people who can give you strength in the end .

import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class XLSXDownloader {

    public static void main(String[] args) {
        String xlsxFilePath = "C:\\Users\\Administrator\\Desktop\\123.xlsx"; // 指定XLSX文件路径
        String downloadDirectory = "F:\\123\\"; // 指定下载目录
        int maxThreads = 5; // 最大线程数

        long startTime = System.currentTimeMillis(); // 记录开始时间

        List<Integer> failedRows = new ArrayList<>(); // 用于存储下载失败的行号
        List<String> failedUrls = new ArrayList<>(); // 用于存储下载失败的URL

        try {
            FileInputStream fileInputStream = new FileInputStream(xlsxFilePath);
            Workbook workbook = new XSSFWorkbook(fileInputStream);
            Sheet sheet = workbook.getSheetAt(0); // 假设数据在第一个工作表中

            int totalRows = sheet.getPhysicalNumberOfRows(); // 总表行数

            ExecutorService executorService = Executors.newFixedThreadPool(maxThreads);

            for (int currentRow = 0; currentRow < totalRows; currentRow++) {
                Row row = sheet.getRow(currentRow);

                for (Cell cell : row) {
                    if (cell.getCellType() == CellType.STRING) {
                        String cellValue = cell.getStringCellValue();

                        // 检查单元格内容是否包含 "http://" 或 "https://"
                        if (cellValue.contains("http://") || cellValue.contains("https://")) {
                            final int finalCurrentRow = currentRow;
                            executorService.submit(() -> {
                                try {
                                    downloadFile(cellValue, downloadDirectory, finalCurrentRow, failedUrls);
                                } catch (IOException e) {
                                    System.err.println("下载失败:" + cellValue);
                                    failedRows.add(finalCurrentRow); // 记录下载失败的行号
                                    failedUrls.add(cellValue); // 记录下载失败的URL
                                }
                            });
                        }
                    }
                }
            }

            executorService.shutdown();

            while (!executorService.isTerminated()) {
                // 等待所有线程完成
            }

            if (!failedRows.isEmpty()) {
                generateErrorTable(failedRows, failedUrls, downloadDirectory);
            }

            long endTime = System.currentTimeMillis(); // 记录结束时间
            long totalTime = endTime - startTime; // 计算总耗时时间(毫秒)
            long totalTimeInMinutes = totalTime / (60 * 1000); // 转换成分钟

            System.out.println("下载完成。总耗时时间:" + totalTimeInMinutes + " 分钟");
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private static void downloadFile(String urlString, String downloadDirectory, int currentRow, List<String> failedUrls) throws IOException {
        URL url = new URL(urlString);
        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        connection.setRequestMethod("GET");

        int responseCode = connection.getResponseCode();

        if (responseCode == HttpURLConnection.HTTP_OK) {
            String fileName = urlString.substring(urlString.lastIndexOf('/') + 1);
            String subDirectory = urlString.substring(urlString.indexOf(".com/") + 5, urlString.lastIndexOf('/') + 1);
            String filePath = downloadDirectory + File.separator + subDirectory + fileName;

            File file = new File(filePath);
            file.getParentFile().mkdirs(); // 创建目录层级

            // 输出当前线程下载的是总表的第几行和总表的进度
            System.out.println("线程正在下载总表的第 " + (currentRow + 1) + " 行,总进度 " + (currentRow + 1) + "/" + failedUrls.size());

            try (InputStream inputStream = connection.getInputStream();
                 FileOutputStream outputStream = new FileOutputStream(filePath)) {

                byte[] buffer = new byte[1024];
                int bytesRead;

                while ((bytesRead = inputStream.read(buffer)) != -1) {
                    outputStream.write(buffer, 0, bytesRead);
                }
            }
        } else {
            throw new IOException("下载失败,HTTP响应码:" + responseCode);
        }
    }

    private static void generateErrorTable(List<Integer> failedRows, List<String> failedUrls, String downloadDirectory) throws IOException {
        String errorTableFilePath = downloadDirectory + File.separator + "error_table.xlsx";
        Workbook workbook = new XSSFWorkbook();
        Sheet sheet = workbook.createSheet("Errors");

        int rowNum = 0;
        for (int i = 0; i < failedRows.size(); i++) {
            int row = failedRows.get(i);
            String url = failedUrls.get(i);

            Row newRow = sheet.createRow(rowNum++);

            Cell cell1 = newRow.createCell(0);
            cell1.setCellValue("下载失败的行:" + (row + 1));

            Cell cell2 = newRow.createCell(1);
            cell2.setCellValue("下载失败的URL:" + url);
        }

        try (FileOutputStream outputStream = new FileOutputStream(errorTableFilePath)) {
            workbook.write(outputStream);
        }
    }
}

Guess you like

Origin blog.csdn.net/s_sos0/article/details/132843898