python+BeautifulSoup+selenium+mysqldb completes data capture

# coding=utf-8
'''
Created on 2017年2月20日

@author: chenkai
'''
import MySQLdb
import sys
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.remote import webelement
from selenium.webdriver.remote.webelement import WebElement
'''
连接数据库
'''
def getConn():
    host ='127.0.0.1'
    user ='root'
    passwd ='123456'
    port = 3306
    dbcon=MySQLdb.connect(host,user,passwd,port=3306,charset="utf8")
    return dbcon
def getCursor(mysqlConn):
    return mysqlConn.cursor()
def closeDBConnandCur(cur,mysqlConn):
    cur.close()
    mysqlConn.commit() #Add this sentence, submit the database operation before closing the database connection
    mysqlConn.close()
#Connection Database
mysqlConn=getConn() #Get cursor
cur
=getCursor(mysqlConn) #Use
test database
cur.execute("use test")

'''
Browser
'''
options=webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches ", ["ignore-certificate-errors"])
driver=webdriver.Chrome(executable_path="C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe", chrome_options=options) #Call chrome browser#
print dir(driver)
driver.get('https://sanya.nuomi.com/326')
#点击按钮
#driver.find_element_by_class_name("next-btn").click()
#
page = driver.page_source
# print(page)
# print type(page)
#
soup = BeautifulSoup(page,'html.parser',from_encoding="utf-8")
# print soup.prettify()
#
div_list=soup.find_all("div", class_="contentbox")
shopUrl=""
shopName=""
index=1001
for con in div_list:
    index+=1
    shopUrl=("https:"+con.a.get("href")).encode('utf-8')# 转码,插入mysql后不会乱码
    shopName=(con.h4.get_text()).encode('utf-8')
#     shopUrl.encode('utf-8')
#     shopName.encode('utf-8')
    print shopUrl,shopName
    print 'insert into  k_bdnm_shopinfo values(%d,%s,%s)'%(index,shopUrl,shopName)
    try:
        cur.execute("insert into  k_bdnm_shopinfo values(%d,'%s','%s')"%(index,shopUrl,shopName))
    except MySQLdb.Error, e: 
            print "Mysql Error %d: %s" % (e.args[0], e.args[1])
       
driver.quit()
closeDBConnandCur(cur,mysqlConn)#关闭游标和数据库连接

'''
数据表信息
'''
CREATE TABLE `k_bdnm_shopinfo` (
  `shop_id` int(11) NOT NULL auto_increment,
  `shop_url` varchar(300) NOT NULL,
  `shop_name` varchar(100) NOT NULL,
  PRIMARY KEY  (`shop_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=326332010&siteId=291194637