爬虫遇到路径转换的解决方案

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/dreamzuora/article/details/84326651
				String href = n.attr("abs:href");//jsoup自带的路径转换方法,有的时候行不通
				if("".equals(href)) {
					href = n.attr("href");
					if (href.indexOf("http") < 0) {
						href = getAbsoluteURL(url, href);
					}
				}
				

	@SuppressWarnings("finally")
	public static String getAbsoluteURL(String baseURI, String relativePath) {
		String abURL = null;
		try {
			URI base = new URI(baseURI);// 基本网页URI
			URI abs = base.resolve(relativePath);// 解析于上述网页的相对URL,得到绝对URI
			URL absURL = abs.toURL();// 转成URL
			abURL = absURL.toString();
		} catch (MalformedURLException e) {
			e.printStackTrace();
		} catch (URISyntaxException e) {
			e.printStackTrace();
		} finally {
			return abURL;
		}
	}

猜你喜欢

转载自blog.csdn.net/dreamzuora/article/details/84326651