爬取全国城市近5.6年来空气质量情况

import urllib.request
import urllib.parse
import requests
import csv
from lxml import etree
from selenium import webdriver
import time

url='https://www.aqistudy.cn/historydata/index.php'
#broswer = webdriver.Chrome('C:\\Users\\lenovo\\AppData\\Local\\Programs\\Python\\Python37-32\\chromedriver.exe')
#broswer.get(url)
#c=broswer.find_elements_by_xpath('//td[@align="center"]')
#headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
response=requests.get(url)
html=etree.HTML(response.text)
city=html.xpath('/html/body/div[3]/div/div[1]/div[2]/div[2]/ul/div[2]/li/a/text()')
urls ='https://www.aqistudy.cn/historydata/monthdata.php?city='
listw=['名字','月份','范围','质量等级','PM2.5','PM10','SO2','CO','NO2','O3']
#with open('C:\\Users\\lenovo\\Desktop\\good3.csv', 'a+', newline='', encoding='utf-8')as f:
  # writers = csv.writer(f)
   #writers.writerow(listw)
liste=[]

for i in city:
    urls1=urls+urllib.parse.quote(i)
    broswer = webdriver.Chrome()
    broswer.get(urls1)
    time.sleep(3)
    with open('C:\\Users\\lenovo\\Desktop\\{}.csv'.format(i), 'a+', newline='', encoding='utf-8')as f:
        writers = csv.writer(f)
        writers.writerow(listw)
    c = broswer.find_elements_by_xpath('//td[@align="center"]')
    for j in range(0, len(c), 10):
           with open('C:\\Users\\lenovo\\Desktop\\{}.csv'.format(i), 'a+', newline='', encoding='utf-8')as f:
             writers = csv.writer(f)
             liste = [i, c[j].text, c[j + 1].text, c[j + 2].text, c[j + 3].text, c[j + 4].text, c[j + 5].text,
                     c[j + 6].text, c[j + 7].text, c[j + 8].text, c[j + 9].text]
             writers.writerow(liste)
    broswer.quit()

猜你喜欢

转载自www.cnblogs.com/persistence-ok/p/11029227.html