.NET DotnetSpider--WebDrvierSpider(ajax动态加载的数据获取)

   爬虫获取数据时,可能会遇到AJAX加载的页面,如果无法分析出接口的话,就只能使用秘密武器——WebDriverDownloader。不过最好还是分析出接口为好,WebDriver的性能实在是太低了。

            #region WebDriverDownloader
            var option = new Option();
            option.LoadImage = false;
            option.LoadFlashPlayer = false;
            option.AlwaysLoadNoFocusLibrary = false;
            option.Headless = true;
            IWebDriverAction webDriverAction = new Click();//webDriver事件
            List<IWebDriverAction> webDriverActions = new List<IWebDriverAction>();//webDriver事件链表
            webDriverActions.Add(webDriverAction);//add webDriver事件
            var downloader = new WebDriverDownloader(Browser.Chrome, 5000, option);
            downloader.Actions = webDriverActions;//绑定到浏览器,等其执行完成load后执行
            spider.Downloader = downloader;
            #endregion
    /// <summary>
    /// 点击操作的实现
    /// </summary>
    public class Click : IWebDriverAction
    {
        /// <summary>
        /// 滚动次数
        /// </summary>
        public int ClickTimes { get; set; } = 1;

        /// <summary>
        /// 点击操作的具体实现
        /// </summary>
        /// <param name="webDriver">WebDriver</param>
        /// <returns>是否操作成功</returns>
        public bool Invoke(RemoteWebDriver webDriver)
        {
            try
            {
                //webDriver.Manage().Window.Maximize();
                for (var i = 0; i < ClickTimes; i++)
                {
                    Thread.Sleep(2000);
                    string sJavascriptCode = "window.open("https://www.baidu.com/");";
                    webDriver.ExecuteScript(sJavascriptCode);
                    Thread.Sleep(1000);
                }
            }
            catch (Exception)
            {
                return false;
            }
            return true;
        }
    }

猜你喜欢

转载自www.cnblogs.com/TTonly/p/10346236.html
今日推荐