爬取金属价格

# -*- coding: utf-8 -*-


import urllib
import sys
import pandas as pd
from bs4 import BeautifulSoup
import simplejson as json
import importlib
importlib.reload(sys)

#设置目标url,利用urllib.request.Request创建请求
urls=[]
header={}
header['User-Agent']='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0'
for i in range(1,6):
    url='http://www.custeel.com/reform/json/luliao/coloredPrice00500'+ str(i) + '.json'
    urls.append(url)
js_price=pd.DataFrame(columns=['JYSC','JYSNAME','PRICE','PRICE_CCL','PRICE_CJL','PRICE_H',
                               'PRICE_K','PRICE_L','PRICE_S','PRICE_UP','TIME','VARIETY','VNAME'])
for url in urls:   
    
    req=urllib.request.Request(url,headers=header)
    
    #使用add_header设置请求头,将代码伪装成浏览器
    #req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0')
    
    # 使用urllib.request.urlopen打开页面,使用read方法保存html代码
    req=urllib.request.urlopen(req)
    dt_code=req.read()
    plain_text=str(dt_code,'utf-8')
    
    # 使用BeautifulSoup创建html代码的BeautifulSoup实例,存为soup
    soup=BeautifulSoup(plain_text)
    dtsoup=soup.find('p').text
    soups=json.loads(dtsoup)
    
    for s in soups:
        ss=pd.Series(s,index=js_price.columns)
        js_price=js_price.append(ss,ignore_index=True)

pd.DataFrame.to_csv(js_price,"js_price.csv",',')
    
writer1 = pd.ExcelWriter('js_price.xls')
js_price.to_excel(writer1,'Sheet1')
writer1.save()


发布了348 篇原创文章 · 获赞 210 · 访问量 87万+

猜你喜欢

转载自blog.csdn.net/u010916338/article/details/103352246