1 # -*- coding=utf-8 -*-
2 import time
3 from selenium import webdriver
4 from selenium.webdriver import *
5
6
7 # 设置请求头
8 dcap = dict(DesiredCapabilities.PHANTOMJS)
9 dcap['phantomjs.page.settings.userAgent'] = (
10 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'
11 )
12 url = 'https://www.guazi.com/www/buy/i7/'
13
14 driver = webdriver.PhantomJS()
15 driver.get(url)
16 while True:
17 for i in range(1, 41):
18 title_xpath = "/html/body/div[4]/ul/li[" + str(i) + "]/a/h2"
19 info_xpath = "/html/body/div[4]/ul/li[" + str(i) + "]/a/div[@class='t-i']"
20 price_xpath = "/html/body/div[4]/ul/li[" + str(i) + "]/a/div[2]/p"
21 title = driver.find_element_by_xpath(title_xpath).text
22 info = driver.find_element_by_xpath(info_xpath).text
23 price = driver.find_element_by_xpath(price_xpath).text
24 print "正在保存数据 ------" + title
25 with open("C:\Users\Administrator\Desktop\guazi.csv", 'a') as f:
26 f.write('{},{},{}\n'.format(title, info, price).encode('gbk'))
27 try:
28 driver.find_element_by_class_name("next").click()
29 time.sleep(1.5)
30 except:
31 break
32 driver.quit()