使用Java-selenium自动化测试技术获取网页数据(以上交所公告信息及监管信息为例)

本文数据库访问使用的是spring-boot-data-jpa,不懂得同学可以先了解spring-boot-data-jpa再来看本文章。

package com.frank.demo.etl.service.impl;



import java.io.File;
import java.util.Calendar;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;


import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeDriverService;
import org.quartz.Job;
import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;
import org.springframework.beans.factory.annotation.Autowired;


import com.frank.demo.etl.dao.EtlDataT1004Dao;
import com.frank.demo.etl.dao.EtlDataT1008Dao;
import com.frank.demo.etl.dao.EtlDataT1012Dao;
import com.frank.demo.etl.entity.EtlDataT1004;
import com.frank.demo.etl.entity.EtlDataT1008;
import com.frank.demo.etl.entity.EtlDataT1012;
import com.frank.demo.etl.service.EtlGeetestService;
import com.frank.demo.util.comm.util.DateUtil;


/**
 * CreatTime:2018年1月18日 下午2:51:15 
 * 
 * @author Frank
 * @classExplain if you want to understand this code,I think you should open the
 *               site(http://www.sse.com.cn/assortment/stock/list/share/) and
 *               read it against the source code。
 */
public class EtlFromShStockJob2  {
private static String ETLURL = "http://www.sse.com.cn/assortment/stock/list/info/##TYPE##/index.shtml?COMPANY_CODE=##CODE##";
private static String SELENIUMURL = "src/test/resources/selenium/chromedriver.exe";
private WebDriver driver;
private String firstTile = "";
private ChromeDriverService service;
@Autowired
EtlDataT1004Dao etlDataT1004Dao;
@Autowired
EtlDataT1008Dao etlDataT1008Dao;
@Autowired
EtlDataT1012Dao etlDataT1012Dao;
@Autowired
EtlGeetestService etlGeetestService;



public string etl() {
List<EtlDataT1004> dataT1004s = new LinkedList<>();
String result = "ok";
try {
dataT1004s = etlDataT1004Dao.findAll();
Map<String, EtlDataT1004> dataMap = new LinkedHashMap<String, EtlDataT1004>();
// Start browser
service = new ChromeDriverService.Builder().usingDriverExecutable(new File(SELENIUMURL)).usingAnyFreePort().build();
service.start();
driver = new ChromeDriver(service);
for (EtlDataT1004 etlDataT1004 : dataT1004s) {
this.getAnnouncement(etlDataT1004.getF1());
}
this.getRegulatoryInformation();
driver.close();
service.stop();
} catch (Exception e) {
result = e.getMessage();

}

        return result ;

}


/**
* obtain announcement of company

* @param companyCode
*/
private void getAnnouncement(String companyCode) throws Exception {
String etUrl = ETLURL.replace("##TYPE##", "announcement").replace("##CODE##", companyCode);
driver.get(etUrl);
By btn = By.id("btnQuery");
boolean loader = etlGeetestService.waitElementLoad(driver, btn, 0);
Thread.sleep(2000);
if (loader) {
By start = By.id("start_date");
By end = By.id("end_date");
String endDate = DateUtil.getNowDate();
// The start date is the last announcement date captured by the last
// cycle, and if there is no record, it acquiedes for nearly three
// years.
String latelyDate = etlDataT1008Dao.findLatelyDate(companyCode);
boolean eq = DateUtil.compareDate(endDate, latelyDate);
if (null != latelyDate && !"".equals(latelyDate) && eq)
latelyDate = endDate;
if (null == latelyDate || "".equals(latelyDate)) {
Calendar threeyearsago = Calendar.getInstance();
threeyearsago.add(Calendar.YEAR, -3);
threeyearsago.add(Calendar.DATE, 1);
Date beforDate = threeyearsago.getTime();
latelyDate = DateUtil.getDate(beforDate);
}
driver.findElement(By.id("inputCode")).sendKeys(companyCode);
this.clickDate(start, latelyDate);
Thread.sleep(500);
this.clickDate(end, endDate);
driver.findElement(btn).click();
List<EtlDataT1008> etlDataT1008s = new LinkedList<>();
boolean nextPage = false;
do {
By data = By.className("modal_pdf_list");
boolean loadeData = etlGeetestService.waitElementLoad(driver, data, 0);
if (loadeData) {
List<WebElement> dataList = driver.findElement(data).findElements(By.tagName("dd"));
if (dataList.size() > 0)
firstTile = dataList.get(0).findElement(By.tagName("a")).getAttribute("href");
for (WebElement webElement : dataList) {
try {
String date = webElement.findElement(By.tagName("span")).getText().trim();
String url = webElement.findElement(By.tagName("a")).getAttribute("href").trim();
String title = webElement.findElement(By.tagName("a")).getAttribute("title").trim();
EtlDataT1008 etlDataT1008 = new EtlDataT1008();
etlDataT1008.setF1(companyCode);
etlDataT1008.setF2(title);
etlDataT1008.setF3(url);
etlDataT1008.setF4(date);
etlDataT1008.setUpstatus("1");
etlDataT1008.setUptime(DateUtil.getNowDatetime());
etlDataT1008s.add(etlDataT1008);
} catch (Exception e) {
e.printStackTrace();
}
}
}
nextPage = this.getNextPage();
} while (nextPage);
// delete latelyDate data
etlDataT1008Dao.deleteByDate(companyCode, latelyDate);
etlDataT1008Dao.save(etlDataT1008s);
}
}


/**
* obtain regulatory information of company
*/
private void getRegulatoryInformation() throws Exception {
String etlUrl = "http://www.sse.com.cn/disclosure/credibility/supervision/measures/";
driver.get(etlUrl);
By by = By.id("panel-1");
boolean load = etlGeetestService.waitElementLoad(driver, by, 0);
Thread.sleep(2000);
if (load) {
By start = By.id("start_date");
By end = By.id("end_date");
String endDate = DateUtil.getNowDate();
// The start date is the last regulatory date captured by the last
// cycle, and if there is no record, it acquiedes for nearly three
// years.
String latelyDate = etlDataT1012Dao.findLatelyDate();
if (null == latelyDate || latelyDate.equals("")) {
latelyDate = "2013-01-01";
}
etlDataT1012Dao.deleteByDate(latelyDate);
this.clickDate(start, latelyDate);
Thread.sleep(500);
this.clickDate(end, endDate);
By btn = By.id("btnQuery");
driver.findElement(btn).click();
Thread.sleep(2000);
By data = By.tagName("table");
By tr = By.tagName("tr");
By td = By.tagName("td");
WebElement dataElement = null;
boolean nexpage = false;
List<EtlDataT1012> etlDataT1012s = new LinkedList<>();
do {
do {
dataElement = driver.findElement(by).findElement(data);
} while (null == dataElement);
List<WebElement> list = dataElement.findElements(tr);
for (WebElement webElement : list) {
List<WebElement> tdElements = webElement.findElements(td);
if (tdElements.size() < 6) {
continue;
}
EtlDataT1012 etlDataT1012 = new EtlDataT1012();
etlDataT1012.setF1(tdElements.get(0).getText());
etlDataT1012.setF2(tdElements.get(5).getText());
etlDataT1012.setF3(tdElements.get(2).getText());
etlDataT1012.setF4(tdElements.get(4).getText());
etlDataT1012.setF5(tdElements.get(3).getText());
if (webElement.getAttribute("class") == "isClickTr") {
etlDataT1012.setF6(tdElements.get(3).findElement(By.tagName("a")).getAttribute("href"));
}
etlDataT1012.setUpstatus("1");
etlDataT1012.setUptime(DateUtil.getNowDatetime());
etlDataT1012s.add(etlDataT1012);
}
nexpage = this.getNextPage2();
} while (nexpage);
etlDataT1012Dao.save(etlDataT1012s);
}
}


/**
* regulatory click next page

* @return
*/
private boolean getNextPage2() {
boolean click = false;
By page = By.className("page-con-table");
List<WebElement> pages = driver.findElement(page).findElements(By.tagName("li"));
if (null != pages && !pages.isEmpty() && pages.size() > 3) {
WebElement nextElement = pages.get(pages.size() - 1).findElement(By.tagName("a"));
if (!nextElement.getAttribute("class").equals("disable")) {
nextElement.click();
click = true;
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
return click;
}


/**
* click next page

* @return
*/
private boolean getNextPage() {
boolean click = false;
By page = By.className("page-con-table");
List<WebElement> pages = driver.findElement(page).findElements(By.tagName("li"));
if (null != pages && !pages.isEmpty() && pages.size() > 3) {
WebElement nextElement = pages.get(pages.size() - 1).findElement(By.tagName("a"));
if (!nextElement.getAttribute("class").equals("disable")) {
nextElement.click();
click = true;
String chageTile = "";
do {
By data = By.className("modal_pdf_list");
etlGeetestService.waitElementLoad(driver, data, 0);
try {
List<WebElement> dataList = driver.findElement(data).findElements(By.tagName("dd"));
if (dataList.size() > 0)
chageTile = dataList.get(0).findElement(By.tagName("a")).getAttribute("href");
} catch (Exception e) {
try {
Thread.sleep(2000);
} catch (InterruptedException e1) {
}
break;
}
} while (chageTile.equals(firstTile));
}
}
return click;
}


/**
* Selection date control!

* @param date
*            tag
* @param selectDate
*            example:2017-01-24
* @throws Exception
*/
private void clickDate(By date, String selectDate) throws Exception {
By years = By.className("datetimepicker-years");
By months = By.className("datetimepicker-months");
By days = By.className("datetimepicker-days");
WebElement monthElement = null;
WebElement dayElement = null;
driver.findElement(date).click();
do {
List<WebElement> daysList = driver.findElements(days);
for (WebElement webElement : daysList) {
if (webElement.isDisplayed()) {
dayElement = webElement;
webElement.findElement(By.className("switch")).click();
break;
}
}
} while (null == dayElement);
do {
List<WebElement> monthsList = driver.findElements(months);
for (WebElement webElement : monthsList) {
if (webElement.isDisplayed()) {
monthElement = webElement;
webElement.findElement(By.className("switch")).click();
break;
}
}
} while (null == monthElement);


List<WebElement> yearslList = driver.findElements(years);
for (WebElement webElement : yearslList) {
if (webElement.isDisplayed()) {
List<WebElement> spansYearsElements = webElement.findElements(By.tagName("span"));
for (WebElement webElements : spansYearsElements) {
if (webElements.getText().trim().equals(selectDate.split("-")[0])) {
webElements.click();
break;
}
}
break;
}
}
List<WebElement> spansMonthsElements = monthElement.findElements(By.tagName("span"));
for (WebElement webElement : spansMonthsElements) {
if (webElement.getText().trim().equals(DateUtil.convertoMonth(selectDate.split("-")[1]))) {
webElement.click();
break;
}
}
List<WebElement> spansDaysElements = dayElement.findElements(By.className("day"));
for (WebElement webElement : spansDaysElements) {
if ("rgba(51, 51, 51, 1)".equals(webElement.getCssValue("color")) || "rgba(255, 255, 255, 1)".equals(webElement.getCssValue("color"))) {
String dnum = webElement.getText().trim();
if (dnum.length() == 1) {
dnum = "0" + dnum;
}
if (dnum.equals(selectDate.split("-")[2])) {
webElement.click();
break;
}
}
}
}


}


喜欢朋友可以关注我的个人微信公众号哦,会同步更新相应技术,二维码见下图。


萌萌技术

猜你喜欢

转载自blog.csdn.net/u014267900/article/details/79308054