仅做记录
#动态渲染页面爬取(selenium) #1)模块导入 from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver import ActionChains from selenium.common.exceptions import NoSuchElementException, TimeoutException # 2)声明浏览器对象 browser = webdriver.Chrome() browser = webdriver.Firefox() browser = webdriver.Edge() browser = webdriver.PhantomJS() browser = webdriver.Safari() # 3)访问页面 browser.get('https://www.jd.com') browser.close() # 4)查找节点 # 单个节点 input_first = browser.find_element_by_id('q') input_first = browser.find_element(By.ID, 'q') input_second = browser.find_element_by_class_name('#q') input_third = browser.find_element_by_xpath('//*[@id="q"]') # 多个节点 lis = browser.find_elements_by_css_selector('.service-bd li') # 5)节点交互 input = browser.find_element_by_id('q') input.send_keys('iPhone') button = browser.find_element_by_class_name('btn-search') button.click() # 6)动作链 url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable' browser.get(url) browser.switch_to.frame('iframeResult') source = browser.find_element_by_css_selector('#draggable') target = browser.find_element_by_css_selector('#droppable') actions = ActionChains(browser) actions.drag_and_drop(source, target) actions.perform() # 7)执行JavaScript browser.execute_script('window.scrollTo(0, document.body.scrollHeight)') browser.execute_script('alert("To Bottom")') # 8)获取节点信息 # 获取属性 logo = browser.find_element_by_id('zh-top-link-logo') print(logo) print(logo.get_attribute('class')) # 获取文本值 input = browser.find_element_by_class_name('zu-top-add-question') print(input.text) # 获取id,位置,标签名和大小 input = browser.find_element_by_class_name('zu-top-add-question') print(input.id) print(input.location) print(input.tag_name) print(input.size) # 9)切换 Frame browser = webdriver.Chrome() url = 'https://mail.163.com/' browser.get(url) browser.switch_to.frame('scoreIndexPopIfm') try: logo = browser.find_element_by_id('scoreIndexPop') except NoSuchElementException: print('NO LOGO') browser.switch_to.parent_frame() logo = browser.find_element_by_id('scoreIndexPop') print(logo) print(logo.text) # 10)延时等待 # 隐式等待(注意代码位置) browser = webdriver.Chrome() browser.implicitly_wait(10) url = 'https://www.zhihu.com/explore' browser.get(url) input = browser.find_element_by_class_name('zu-top-add-question') print(input) # 显示等待 browser = webdriver.Chrome() browser.get('https://www.jd.com') wait = WebDriverWait(browser, 10) input = wait.until(EC.presence_of_all_elements_located((By.ID, 'key'))) button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.button'))) print(input, button) # 等待条件 # presence_of_element_located : 节点加载出来 ,传入定位元组,如(By.ID, 'p') # element_to_be_clickable : 节点可点击,也传入定位元组。 # 11)前进和后退 import time browser.get('https://www.baidu.com') browser.get('https://www.jd.com') browser.get('https://www.python.org') browser.back() time.sleep(1) browser.forward() browser.close() # 12)Cookies,获取,添加,删除cookies browser = webdriver.Firefox() browser.get('https://www.zhihu.com/explore') print(browser.get_cookies()) browser.add_cookie({'name': 'name', 'domain': 'www.zhihu.com', 'value': 'germy'}) print(browser.get_cookies()) browser.delete_all_cookies() print(browser.get_cookies()) # cookiew清空了 # 13) 选项卡管理 browser.execute_script('window.open()') # 在浏览器中打开一个新选项卡 print(browser.window_handles) # 这里调用window_handlers属性获取当前开启的所有选项卡,返回的是选项卡的代号列表 browser.switch_to.window(browser.window_handles[1]) # 切换到第二个选项卡 browser.get('https://www.taobao.com') # 在第二个选项卡中淘宝网页 time.sleep(1) browser.switch_to.window(browser.window_handles[0]) # 切换到第一个选项卡 browser.get('https://www.python.org') # 在第二个选项卡中打开python官网 # 14)异常处理 browser = webdriver.Chrome() try: browser.get('https://www.baidu.com') except TimeoutException: print('Time out') try: browser.find_element_by_id('hello') except NoSuchElementException: print('No Element') finally: browser.close()