F12|Java 导出B站分p标题到excel

Js:F12 控制台

1.直接输出

var result = '';
// 如果是从 稍后播放 进入
var content = document.getElementsByClassName('multip-list')[0];
if (!content) {
    
    
  // 找不到标签,则证明是从收藏或者搜索页进入
  // https://www.bilibili.com/video/BV1Kr4y1i7ru
  content = document.getElementsByClassName('list-box')[0].querySelectorAll('li');
} else {
    
    
   // 找出所有class=multip-list-item的标签
   // https://www.bilibili.com/list/watchlater?bvid=BV1Kr4y1i7ru&oid=765670802
   content = content.querySelectorAll(".multip-list-item");
}

//console.log(content)

for (var i = 0; i< content.length; i++){
    
    
  temp = content[i].innerText.split('\n');
  // ["P184", "32. 运维-分库分表-Mycat管理工具", "09:53"]
  // ["32. 运维-分库分表-Mycat管理工具", "09:53"]
  //console.log('temp', content[i].innerText, temp)
  // temp[0]=标题 temp[1]=时长
  // 选择一:输出标题和时长
  //temp.join('\t')
  // 选择二:只输出标题,后面的replace是去掉空格,可以省略
  let index = temp.length > 2 ? 1 : 0;
  temp = temp[index].replace(/\s+/g,"");
  result += temp;
  // 留空一行
  result += '\n\n';
}
console.log(result.replace(/,/g, '\t'));

  • 右键将console所有内容保存到文件
    请添加图片描述

2.加强版:导出excel

//获取分P播放列表(并将列表保存本地 excel 文件)
var listBox = document.getElementsByClassName("list-box")[0],
	liList = listBox.getElementsByTagName("li"),
	title = document.getElementsByTagName("h1")[0].title,
	musicList = [];
	musicTimeList = [];
[].forEach.call(liList, (item, index) => {
    
    
	//console.log('item:',item)
	//console.log('item:',item.getElementsByClassName("duration")[0].innerText)
	musicList.push(item.getElementsByTagName("a")[0].title.replace(/\s+/g,""));
	musicTimeList.push(item.getElementsByClassName("duration")[0].innerText);	
});

var maxWidth = musicList[0].length;

// 设置网页显示的宽度(对保存文件的意义不大)
for (const item of musicList) {
    
    
	item.length > maxWidth ? (maxWidth = item.length) : null;
}
var fontSize = 20;

var tempDom = `<table id="music" border="1" cellspacing="0" cellpadding="20" width='${
      
      maxWidth * fontSize}px'>
					<caption style="font-size: ${
      
      fontSize}px; font-weight: bold;">${
      
      title}</caption>
					<thead style="background-color: darkorange;">
						<tr align="center">
							<th>序号</th>
							<th>分P名称</th>
							<th>时长</th>
						</tr>
					</thead>
					<thbody>`;
musicList.forEach((item, index) => {
    
    
	tempDom += `<tr align="center">
						<td>${
      
      ++index}</td>
						<td align="left">${
      
      musicList[index - 1]}</td>
						<td>${
      
      musicTimeList[index - 1]}</td>
					</tr>`;
});
tempDom += `</thbody>
		</table>`;

//渲染到页面上		
//document.body.innerHTML = tempDom;

// ${document.getElementById("music").outerHTML}
var html = `<!DOCTYPE html>
		<html lang="en">
			<head>
				<meta charset="UTF-8">
			</head>
			<body>
				${
      
      tempDom}
			</body>
		</html>`;

var downloadText = function downloadText(content) {
    
    
	var content = new Blob([content], {
    
     type: "application/vnd.ms-excel" });
	console.log(content);
	var url = window.URL.createObjectURL(content);
	var a = document.createElement("a");
	a.download = title || "bilibili分P目录";
	a.href = url;
	a.click();
	window.URL.revokeObjectURL(url);
};
downloadText(html);

java

1.主要代码

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.parser.Feature;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.util.*;
import java.util.stream.Collectors;

/**
 * Java如何解析html里面的内容并存到数据库
 * https://blog.csdn.net/qq_37284798/article/details/125410786
 * https://blog.csdn.net/jun123355/article/details/126459050
 */
public class BilibiliUtil {
    
    

    public static void main(String[] args) throws Exception {
    
    
        // 获取html的文档对象
        Document doc = Jsoup.connect("https://www.bilibili.com/video/BV1Kr4y1i7ru/").get();
        // 获取视频名称
        String title = doc.getElementsByTag("h1").first().attr("title");

        // List<Map<String, Object>> list = new ArrayList();
        //
        // // 获取视频列表
        // // 无法使用,数据是通过<script>返回的
        // Element searchInfo = doc.getElementsByClass("list-box").first();
        // Elements elements = searchInfo.getElementsByTag("li");
        // for (Element element : elements) {
    
    
        //     // 获取分p标题
        //     String name = element.getElementsByTag("a").attr("title");
        //     if (StringUtils.isBlank(name)) {
    
    
        //         continue;
        //     }
        //     Map<String, Object> map = new HashMap();
        //     list.add(map);
        //     map.put("name", name);
        //
        //     // 获取时长
        //     Elements duration = element.getElementsByClass("duration");
        //     if (duration != null && duration.size() > 0) {
    
    
        //         String time = duration.first().text();
        //         map.put("time", time);
        //     }
        // }

        // window.__INITIAL_STATE__
        // Jsoup获取script标签之间的文本内容
        // https://blog.csdn.net/qq_23114831/article/details/122434465
        // https://www.shuzhiduo.com/A/kjdwK7AdNp/
        Elements scriptEle = doc.select("script");
        // Elements elScripts = doc.getElementsByTag("script");
        if (scriptEle != null) {
    
    
            for (int i = 0; i < scriptEle.size(); i++) {
    
    
                Element element = scriptEle.get(i);
                String data = element.data();
                if (data != null && data.startsWith("window.__INITIAL_STATE__")) {
    
    
                    System.out.println(data);

                    // {"aid":765670802,"bvid":"BV1Kr4y1i7ru","p":1,"episode":"",
                    // "videoData":{"bvid":"BV1Kr4y1i7ru","aid":765670802,"videos":195,"tid":231,"tname":"计算机技术","copyright":1,"pic":"",
                    //      "title":"黑马程序员 MySQL数据库入门到精通,从mysql安装到mysql高级、mysql优化全囊括","pubdate":1642467619,"ctime":1642411456,
                    //      "desc":"本课程是目前为止,MySQL方面最为全面的一套课程",
                    //      "pages":[{"cid":772184762,"page":1,"from":"vupload","part":"01.MySQL课程介绍","duration":552,"vid":"","weblink":"","dimension":{"width":1920,"height":1080,"rotate":0},"first_frame":"http:\u002F\u002Fi0.hdslb.com\u002Fbfs\u002Fstoryff\u002Fn220713143uztk9kzge34j2edgjkyd4d_firsti.jpg"},
                    //      {"cid":486995038,"page":2,"from":"vupload","part":"02. 基础-课程内容&数据库相关概念","duration":535,"vid":"","weblink":"","dimension":{"width":1920,"height":1080,"rotate":0},"first_frame":"http:\u002F\u002Fi0.hdslb.com\u002Fbfs\u002Fstoryff\u002Fn220118a21vgrql5xd9oqi35psdxsmax_firsti.jpg"},
                    //      {"cid":486996065,"page":3,"from":"vupload","part":"03. 基础-概述-MySQL安装及启动","duration":842,"vid":"","weblink":"","dimension":{"width":1920,"height":1080,"rotate":0},"first_frame":"http:\u002F\u002Fi1.hdslb.com\u002Fbfs\u002Fstoryff\u002Fn220118a219vca8y0ps5eo2wvd1bux17_firsti.jpg"}
                    String jsonStr = data.substring("window.__INITIAL_STATE__=".length());
                    // Feature.OrderedField fastJson 中提供的 将字符串转为某些对象时,为了保证对象内部的顺序保持不变
                    JSONObject jsonObject = JSONObject.parseObject(jsonStr, Feature.OrderedField);
                    // videoData.pages
                    // "pages": [{
    
    
                    //     "cid": 772184762,
                    //     "page": 1,
                    //     "from": "vupload",
                    //     "part": "01.MySQL课程介绍",
                    //     "duration": 552,
                    //     "vid": "",
                    //     "weblink": "",
                    //     "dimension": {"width": 1920, "height": 1080, "rotate": 0},
                    //     "first_frame": "http:\u002F\u002Fi0.hdslb.com\u002Fbfs\u002Fstoryff\u002Fn220713143uztk9kzge34j2edgjkyd4d_firsti.jpg"
                    // }
                    JSONArray jsonArray = jsonObject.getJSONObject("videoData").getJSONArray("pages");

                    Object collect = jsonArray.stream().map(o -> {
    
    
                        JSONObject jsonObject1 = (JSONObject) o;
                        // Map<String, Object> map = new HashMap();
                        // 保持Map元素的顺序
                        Map<String, Object> map = new LinkedHashMap();
                        map.put("集数", jsonObject1.get("page"));
                        // Java 利用replaceAll替换字符串的空格
                        // https://www.cnblogs.com/tunqing/p/15571419.html
                        map.put("标题", jsonObject1.get("part").toString().replaceAll("\\s*", ""));
                        map.put("时长", secondConvertHourMinSecond(Long.parseLong(jsonObject1.get("duration").toString())));

                        return map;
                        // }).collect(Collectors.toList());
                    }).collect(Collectors.toCollection(JSONArray::new));

                    JSONObject jsonObject1 = new JSONObject();
                    jsonObject1.put("sheet", collect);
                    // json字符串生成excel 通用方法
                    JsonToExcel.jsonToExcel("D:\\export_tmp\\" + title + ".xlsx", jsonObject1);
                    break;
                }

            }
        }

    }


    /**
     * 将秒数转化为时分秒格式【01:01:01】
     * https://blog.51cto.com/u_11269274/5252232
     * https://www.ab62.cn/article/18304.html
     * @param second 需要转化的秒数
     * @return
     */
    public static String secondConvertHourMinSecond(long second) {
    
    
        int temp = (int) second;
        int hh = temp / 3600;
        int mm = (temp % 3600) / 60;
        int ss = (temp % 3600) % 60;
        return (hh == 0 ? "" : ((hh < 10 ? ("0" + hh) : hh) + ":")) +
                (mm < 10 ? ("0" + mm) : mm) + ":" +
                (ss < 10 ? ("0" + ss) : ss);
    }

}

2.pom

  <!--JAVA 解析HTML-->
  <dependency>
       <groupId>org.jsoup</groupId>
       <artifactId>jsoup</artifactId>
       <version>1.14.1</version>
   </dependency>

  <dependency>
      <groupId>com.alibaba</groupId>
      <artifactId>fastjson</artifactId>
      <version>2.0.7</version>
  </dependency>

  <!--        引入poi-ooxml,就会引入poi-->
  <!--        <dependency>-->
  <!--            <groupId>org.apache.poi</groupId>-->
  <!--            <artifactId>poi</artifactId>-->
  <!--            <version>4.1.0</version>-->
  <!--        </dependency>-->
  <dependency>
      <groupId>org.apache.poi</groupId>
      <artifactId>poi-ooxml</artifactId>
      <version>4.1.0</version>
  </dependency>

3.通用json转excel工具

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;

import java.io.File;
import java.io.FileOutputStream;
import java.util.Set;


/**
 * JSON转EXCEL,支持多个数组生成多个sheet
 * https://blog.csdn.net/hanne_lovegood/article/details/124351031
 */
public class JsonToExcel {
    
    

	public static void main(String[] args) throws Exception {
    
    
        // json文件地址
        String jsonFilePath = "D:\\export_tmp\\test4.txt";
        // 设置生成的文件名及路径
        String fileName = "D:\\export_tmp\\test4.xlsx";

        // 保证文件存在
        File file = new File(jsonFilePath);
        if (!file.exists()) {
    
    
            System.out.println("json文件不存在");
        }
        String jsonString = readFileContent(file);
        if (jsonString == null || jsonString.trim().length() == 0) {
    
    
            System.out.println("文件内容为null");
        }

        JSONObject jsonObject = null;
        // JSONObject jsonObject = JSONObject.parseObject(jsonString, Feature.OrderedField);
        // java判断JSON字符串是JSONObject或JSONArray https://blog.csdn.net/weixin_45353083/article/details/109749473
        // Feature.OrderedField fastJson 中提供的 将字符串转为某些对象时,为了保证对象内部的顺序保持不变
        Object object = JSONObject.parse(jsonString, Feature.OrderedField);
        if (object instanceof JSONObject) {
    
    
            jsonObject = (JSONObject) object;
        } else if (object instanceof JSONArray) {
    
    
            jsonObject = new JSONObject();
            JSONArray jsonArray = (JSONArray) object;
            jsonObject.put("sheet", jsonArray);
        } else{
    
    
            System.out.println("文件格式错误");
            return;
        }

        // 调用转换方法
        jsonToExcel(fileName, jsonObject);

        System.out.println("生成文件成功:" + fileName);
    }
    
    /**
     * 通用json数组导出excel
     * @param fileName 文件路径,绝对路径
     * @param jsonObject
     * @throws Exception
     */
    public static void jsonToExcel(String fileName, JSONObject jsonObject) throws Exception {
    
    
        // 创建HSSFWorkbook对象
        HSSFWorkbook wb = new HSSFWorkbook();

        Set<String> ArrayKeys = jsonObject.keySet();
        // 多个数组的建成多个sheet
        for (String arrayKey : ArrayKeys) {
    
    
            Object value = jsonObject.get(arrayKey);
            if (!(value instanceof JSONArray)) {
    
    
                // 非数组则跳过,只处理第一层,嵌套json暂不考虑
                continue;
            }
            JSONArray jsonArray = jsonObject.getJSONArray(arrayKey);
            if (jsonArray == null || jsonArray.isEmpty()) {
    
    
                continue;
            }

            // 创建HSSFSheet对象
            HSSFSheet sheet = wb.createSheet(ArrayKeys.size() > 1 ? arrayKey : "sheet");
            Set keys = null;
            int rowNo = 0;
            HSSFRow row = sheet.createRow(0);

            // 获取标题
            for (int i = 0; i < jsonArray.size(); i++) {
    
    
                if (keys == null) {
    
    
                    JSONObject item = jsonArray.getJSONObject(i);
                    keys = item.keySet();
                    for (Object s : keys) {
    
    
                        HSSFCell cell = row.createCell(rowNo++);
                        cell.setCellValue(s.toString());
                    }
                } else {
    
    
                    break;
                }
            }
            // 获取数据一次循环一行
            for (int i = 0; i < jsonArray.size(); i++) {
    
    
                rowNo = 0;
                JSONObject item = jsonArray.getJSONObject(i);
                row = sheet.createRow(i + 1);
                keys = item.keySet();
                for (Object s : keys) {
    
    
                    HSSFCell cell = row.createCell(rowNo++);
                    cell.setCellValue(item.getString(s.toString()));
                }
            }
        }
        // 创建Excel文件
        File file = new File(fileName);
        file.createNewFile();
        // 输出到Excel文件
        FileOutputStream output = new FileOutputStream(fileName);
        wb.write(output);

        wb.close();
        output.flush();
        output.close();
    }

	/**
     * 读取文件内容
     * @param file
     */
    public static String readFileContent(File file) {
    
    
        BufferedReader reader = null;
        StringBuffer sbf = new StringBuffer();
        try (InputStreamReader in = new InputStreamReader(new FileInputStream(file),"UTF-8");) {
    
    
            reader = new BufferedReader(in);
            String tempStr;
            while ((tempStr = reader.readLine()) != null) {
    
    
                sbf.append(tempStr);
            }
            reader.close();
            return sbf.toString();
        } catch (IOException e) {
    
    
            e.printStackTrace();
        } finally {
    
    
            if (reader != null) {
    
    
                try {
    
    
                    reader.close();
                } catch (IOException e1) {
    
    
                    e1.printStackTrace();
                }
            }
        }
        return sbf.toString();
    }
}

扩展:B站进入页面如何让它不要自动播放

  • 取消勾选请添加图片描述

扩展:B站分集为什么不连播

  • 播放设置–更多播放设置–播放设置–自动切换
    请添加图片描述

猜你喜欢

转载自blog.csdn.net/weixin_44174211/article/details/129411502
f12