Reptile Case picture lazy loading space simulation qq login Drug Administration (selenium)

Pictures lazy loading

# 图片懒加载

from lxml import etree
import requests

headers={
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'
}

url='http://sc.chinaz.com/tupian/fengjingtupian_%d.html'

fp=open('fengjing.txt','w',encoding='utf-8')
for page in range(1,5):
    if page==1:
        new_url='http://sc.chinaz.com/tupian/fengjingtupian.html'
    else:
        new_url=format(url%page)

    page_text=requests.get(url=new_url,headers=headers).text
    tree=etree.HTML(page_text)
    div_list=tree.xpath('//*[@id="container"]/div')
    for div in div_list:
        img_title=div.xpath('./p/a/text()')[0].encode('iso-8859-1').decode('utf-8')

        # 图片懒加载
        img_src=div.xpath('./ div/a/img/@src2')[0]

        content=img_title+':'+img_src+'\n'
        fp.write(content)

fp.close()

qq space simulation Log

from selenium import webdriver
from time import sleep

# 指定驱动位置
driver=webdriver.Chrome(executable_path='chromedriver.exe')


url='https://qzone.qq.com/'
driver.get(url)

driver.switch_to.frame('login_frame')
m=driver.find_element_by_id('switcher_plogin')
print(m)
m.click()

driver.find_element_by_id('u').send_keys('用户名')
driver.find_element_by_id('p').send_keys('密码')

driver.find_element_by_id('login_button').click()

sleep(3)

page_text=driver.page_source
print(page_text)



driver.quit()

 # Drug use selenium Drug Administration of the Home business name crawling ( 1 - 5)

from Selenium Import the webdriver
 from Time Import SLEEP 

# Specify browser driver 
Driver = webdriver.Chrome (= R & lt executable_path ' chromedriver.exe ' ) 

# open page with GET 
Driver. GET ( ' http://125.35.6.84:81/xk/ ' ) 
FP = Open ( ' Drug companies .txt ' , ' W ' , encoding = ' UTF-. 8 ' )
 for Page in Range ( . 1 , . 6 ):
     IF Page == . 1 :
        path = ' pageIto_first ' 
    the else : 
        path = F ' pageIto_first Page {} '   # WYSIWYG 
    driver.find_element_by_id (path) .click () 
    SLEEP ( . 3 ) 
    a_list = driver.find_elements_by_xpath ( ' // * [@ ID = " gzlist "] / Li / DL / A ' ) for A in a_list: 
        MSG = a.text 
        fp.write (MSG + ' \ n- ' ) 
    Print (F ' completion of the page data acquired {page} ' 
fp.close ()
  

) driver.close()

 

Guess you like

Origin www.cnblogs.com/XLHIT/p/11316743.html