练习将网页抓取的内容通过正则筛选传进数据库

import pymysql
import re
import time

class CatchStockIndexData(object):

    def __init__(self,fund_data,index_name,index_code):
        self.id=0
        self.index_name=index_name
        self.index_code=index_code
        self.fund_data=fund_data    
    
    def re_handle_funddata(self):
        re_template=r">([^\:\s""]+)<"
        ret=re.findall(re_template,self.fund_data)
        return ret
        
    def combine_fund_datalist(self,catched_data):
        name_code_list=[self.id,self.index_code,self.index_name]
        catch_time=[str(time.ctime())]
        fund_data_list=name_code_list+catched_data+catch_time
        return fund_data_list
        
    def thread_data_to_mysql(self,combined_data_list):
        conn=pymysql.connect(host='localhost',port=3306,user='root',password='mysql',database='stock_info',charset='utf8')
        cc=conn.cursor()    
        sql="""insert into stock_information value(%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
        cc.execute(sql,combined_data_list)
        conn.commit()
        print("数据插入成功")
        cc.close()
        conn.close()
        
    def run(self):
        # 1. 处理网页动态数据
        catched_data=self.re_handle_funddata()
        # 2. 整合指数数据列表
        combined_data_list=self.combine_fund_datalist(catched_data)
        # 3. 将整合后列表中的数据传给数据库
        self.thread_data_to_mysql(combined_data_list)
 
if __name__=="__main__":
   
    fund_data="""<tr>
                <td>今开:</td><td id="gt1" class="txtl red">3134.75</td>
                <td>最高:</td><td id="gt2" class="txtl red">3138.46</td>
                <td>涨跌幅:</td><td id="gt3" class="txtl green">-0.43%</td>
                <td>换手:</td><td id="gt4" class="txtl">0.47%</td>
                <td>成交量:</td><td id="gt5" class="txtl">1.65亿手</td>
            </tr>"""
    eastmoney_shangzhen50=CatchStockIndexData(fund_data,index_name="上证50",index_code="000016")
    eastmoney_shangzhency=CatchStockIndexData(fund_data,index_name="创业板指数",index_code="399006")
    eastmoney_shangzhen300=CatchStockIndexData(fund_data,index_name="沪深300",index_code="000300")
    eastmoney_shangzhenA=CatchStockIndexData(fund_data,index_name="A股指数",index_code="000002")
    
    eastmoney_shangzhenA.run()
    
    


备注:网页内容是手动复制的,好low(≧▽≦)/

发布了55 篇原创文章 · 获赞 0 · 访问量 2068

猜你喜欢

转载自blog.csdn.net/KathyLJQ/article/details/102936741
今日推荐