Java + Selenium crawling Chinese weather-related data (timely crawling)
Direct code, available, preliminary version. Fetch data regularly.
package com.test.demo.config;
import org.openqa.selenium.By;
import org.openqa.selenium.Keys;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import org.springframework.util.ResourceUtils;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.time.LocalDateTime;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
/**
*
*/
@Component
@Configuration //1.主要用于标记配置类,兼备Component的效果。
@EnableScheduling
public class StaticScheduleTaskChinaWeatherData {
//String cityName = "西安";
//3.添加定时任务
// @Scheduled(cron = "0/60 * * * * ?") 先暂时不运行
//或直接指定时间间隔,例如:5秒
@Scheduled(fixedRate=60000)
private void configureTasks() throws Exception{
System.err.println("执行静态定时任务时间: " + LocalDateTime.now());
spiderWeather();
}
private void spiderWeather() throws Exception{
File path1 = new File(ResourceUtils.getURL("classpath:").getPath());
if (!path1.exists()) path1 = new File("");
// System.out.println("path:"+path1.getAbsolutePath());
File upload = new File(path1.getAbsolutePath(), "src/main/webapp/data");
if (!upload.exists()) upload.mkdirs();
String pathWeather= upload.getAbsolutePath() + "\\chinaWeatherDataToday.txt"; // 这里最终是detailUrl.txt
File fileDetail = new File(pathWeather);
if (!fileDetail.exists()) {
fileDetail.getParentFile().mkdirs();
}
fileDetail.createNewFile();
FileWriter fileWriterWeather = new FileWriter(fileDetail, true);
BufferedWriter bufferedWriterWeather = new BufferedWriter(fileWriterWeather);
// 可以直接使用
// String sqlStr = "SELECT * FROM zh_address WHERE region_name = '" + "碑林" + "'";
// 声明谷歌浏览器
System.setProperty("webdriver.chrome.driver",
"D:\\Google\\Chrome\\Application\\chromedriver.exe");
WebDriver chromdriver;
chromdriver = new ChromeDriver();
String cityUrl = "http://www.weather.com.cn/";
chromdriver.get(cityUrl);
WebElement input = chromdriver.findElement(By.id("txtZip"));
input.sendKeys("东关南街");
Thread.sleep(2000);
input.sendKeys(Keys.ENTER);
String currentWindow = chromdriver.getWindowHandle();//获取当前窗口句柄
Set<String> handles = chromdriver.getWindowHandles();//获取所有窗口句柄
Iterator<String> it = handles.iterator();
while (it.hasNext()) {
if (currentWindow == it.next()) {
continue;
}
// 获取当前日期
SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//设置日期格式
String dateStr = df.format(new Date());
//System.out.println(df.format(new Date()));// new Date()为获取当前系统时间
WebDriver newChDriver = chromdriver.switchTo().window(it.next());//切换到新窗口
((ChromeDriver) newChDriver).executeScript("window.scrollTo(0, document.body.scrollHeight);");
//System.out.println("New page title is:" + newChDriver.getCurrentUrl());
// 在这里可以获取各种气象数据
WebElement allInfo = newChDriver.findElement(By.id("weatherChart"));
List<WebElement> allInfoElementsList = allInfo.findElements(By.tagName("div"));
WebElement webElement = allInfoElementsList.get(0);
List<WebElement> weatherInfoList = webElement.findElements(By.tagName("ul"));
WebElement ulEle = weatherInfoList.get(0);
List<WebElement> liEles = ulEle.findElements(By.tagName("li"));
// 四个按钮
WebElement tempertureBtn = liEles.get(1);
WebElement humiBtn = liEles.get(2);
WebElement rainBtn = liEles.get(3);
WebElement windBtn = liEles.get(4);
// 对应的四个值
WebElement webElementValue = allInfoElementsList.get(1);
List<WebElement> weatherInfoListValue = webElementValue.findElements(By.tagName("p"));
WebElement tempertureValue = weatherInfoListValue.get(1);
WebElement humiValue = weatherInfoListValue.get(2);
WebElement rainValue = weatherInfoListValue.get(4);
WebElement windValue = weatherInfoListValue.get(3);
// 开始点击取值
tempertureBtn.click();
String tempertureValueStr = tempertureValue.getText();
//System.out.println(tempertureValue.getText());
Thread.sleep(2000);
humiBtn.click();
String humiValueStr = humiValue.getText();
//System.out.println(humiValue.getText());
Thread.sleep(2000);
rainBtn.click();
String rainValueStr = rainValue.getText();
//System.out.println(rainValue.getText());
Thread.sleep(2000);
windBtn.click();
String windValueStr = windValue.getText();
//System.out.println(windValue.getText());
Thread.sleep(2000);
// 将所获取到的所有值写进文本中存放
bufferedWriterWeather.write("当前日期:"+dateStr+"\n气温:"+tempertureValueStr+"\n相对湿度:"+humiValueStr+"\n降水量:"+rainValueStr+"\n风力风向:"+windValueStr+"\n\n");
bufferedWriterWeather.flush();
newChDriver.quit();
}
chromdriver.quit();
fileWriterWeather.close();
bufferedWriterWeather.close();
}
}