python 爬虫将所爬到的数据保存在文件中

Python 爬虫,将所爬到的数据保存在.txt文件中

import urllib.request
import re
response = urllib.request.urlopen("https://search.51job.com/list/010000%252C020000%252C030200%252C040000,000000,0000,00,9,99,python,2,1.html")
#url为将来要爬去的数据的来源网址
html = response.read()
html = html.decode("GBK")
#decode将爬到的数据编码方式改变
lst = re. findall('<span class="t3">(北京|上海|广州|深圳).*</span>\s*<span class="t4">(\d*\.?\d*)-(\d*\.?\d*)(\w)/(.*)</span>',html)
#正则表达式是匹配所要爬去数据的关键,根据所要爬取的数据写出正确的正则表达式
file = open("D:\\1.txt","w")
for i  in lst:
    min = float(i[1])
    max = float(i[2])
    if  i[3] == "千":
        min /= 10
        max /= 10
    if  i[4] == "年":
        min /= 12
        max /= 12
    file.write("%s\t%s\t%.2f\t%.2f\t%s\t" % ("Python",i[0],min,max,"万/月"))
    file.write("\n")
#单位转换
file.close()

猜你喜欢

转载自blog.csdn.net/qq_42980122/article/details/84099691