- Analyze site data interface, address and download the Get Picture
# -*- coding: utf-8 -*-
import os
import re
import sys
import ssl
import xlwt
import time
import _thread
import requests
import pymysql
import threading# 自定义线程
class myThread(threading.Thread):
def __init__(self, threadID, name, counter):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.counter = counter
def run(self):
print ("开始线程:" + self.name)
threadLock.acquire()
getDataFromDB(self.counter)
threadLock.release()
print ("退出线程:" + self.name)
# Cancel the certificate validation
context = ssl._create_unverified_context ()# 请求头定义
headers = {
'Connection': 'keep-alive',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Accept':'text/html,application/xhtml+xml,application/xml;\
q=0.9,image/webp,image/apng,*/*;q=0.8',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36\
(KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
}# File storage path
filePath = 'F.: / Reptilian / Music / Player / Ting /'
# imgpath filePath = + 'IMG /'# Open Database Connectivity
DB = pymysql.connect (
Host = '127.0.0.1',
Port = 3306,
User = 'the root',
the passwd = 'Lutong',
DB = 'Test',
charset = 'UTF8'
)# Use cursor () method to get the operating cursor
cursor = db.cursor ()# Search record number of restrictions
limit = 250# SQL 查询语句
sql = "select name, code from t_player where status = 'used' limit {} offset "# Current local time
t = time.time ()
# thousands of music query address
tingSearchUrl = 'http://tingapi.ting.baidu.com/v1/restserver/ting?from=web&version=5.6.5.0&method=baidu. ting.search.catalogSug & format = json & query = {} '
# To read data from the database
DEF getDataFromDB (i):
, Ltd. Free Join SQL
the try:
# execute SQL statements
SQL1 = sql.format (limit) + str (limit * i)
Print (SQL1)
cursor.execute (SQL1)
# get a list of all the records
= cursor.fetchall results ()
for Row results in:
name = Row [0]
code = Row [1]
# print results
GETIMG (name, code)
the except:
( '! get picture abnormal') print# Get Picture, analysis from the web
DEF GETIMG (name, code):
, Ltd. Free Join tingSearchUrl
url = tingSearchUrl.format (name)
rsp = requests.get (url = url, headers = headers)
jsonData = rsp.json ()
IF (jsonData [ 'ERROR_CODE'] == 22000):
the try:
for imgUrl jsonData = [ 'artist'] [0] [ 'artistpic']
for imgUrl for imgUrl = [0: imgUrl.index ( '@')]
Print ( "singer" '+ name + ' "image address:' + for imgUrl)
imgName code + = '.jpg'
downloadPic (for imgUrl, imgName)
the except:
# acquired image is not recorded singer, is inserted into the database
print ( 'singer"' + name + ' "the picture finding out ')!
the try:
. SQL2 = ( "INSERT INTO t_player_no_img values (null, '{}', '" + + code "')") the format (name) .format (code)
the cursor.execute (SQL2)
the db.commit ()
the except:
DB .rollback ()
the else:
# singer acquired records are not inserted into the database
print ( 'finding singer "' + name + '" information!')
the try:
SQL2 = ( "iNSERT iNTO t_player_no_img values (null, ' } { ',' "+ + code" ') ") the format (name) .format (code).
the cursor.execute (SQL2)
the db.commit ()
the except:
db.rollback ()
# single picture download method
def downloadPic (imgUrl, imgName):
try:
r = requests.get(imgUrl)
with open(filePath + imgName, 'wb') as f:
f.write(r.content)
except requests.exceptions.ConnectionError:
print('图片请求错误!')
return
f.close()
# Get Image format
DEF getPicFormat (URL):
IF url.find ( 'GIF.')> = 0:
return '.gif'
elif url.find ( '. PNG')> = 0:
return '.png'
elif URL .find ( 'JEPG.')> = 0:
return '.jepg'
the else:
return '.jpg'# Create a directory developed
DEF mkdir (path):
path = path.strip ()
path = path.rstrip ( '\\')
ISEXIST = os.path.exists (path)
IF not ISEXIST:
os.makdirs (path)
the else:
print ( 'directory already exists, do not need to be repeated to create!')
# Main method
IF __name__ == '__main__':
# Create a directory
mkDir (filePath)# Thread-locking
ThreadLock of threading.Lock = ()
# thread array
Threads = []
# multithreaded
for I in Range (20 is):
Thread myThread = (I, "the Thread-" + STR (I), I)
threads.append ( the Thread)
for TH in Threads:
th.start ()
for TH in Threads:
th.join ()
# close the database connection
db.close ()
- Analysis page tags, image properties required for a regular match, address and extract images download
# -*- coding: utf-8 -*-
import os
import re
import sys
import ssl
import xlwt
import time
import _thread
import requests
import pymysql
import threading# 自定义线程
class myThread(threading.Thread):
def __init__(self, threadID, name, counter):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.counter = counter
def run(self):
print ("开始线程:" + self.name)
threadLock.acquire()
getDataFromDB(self.counter)
threadLock.release()
print ("退出线程:" + self.name)
# Cancel the certificate validation
context = ssl._create_unverified_context ()# 请求头定义
headers = {
'Connection': 'keep-alive',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Accept':'text/html,application/xhtml+xml,application/xml;\
q=0.9,image/webp,image/apng,*/*;q=0.8',
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36\
(KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
}# File storage path
filePath = 'F.: / Reptilian / Music / Player / xiami /'
# imgpath filePath = + 'IMG /'# Open Database Connectivity
DB = pymysql.connect (
Host = '127.0.0.1',
Port = 3306,
User = 'the root',
the passwd = 'Lutong',
DB = 'Test',
charset = 'UTF8'
)# Use cursor () method to get the operating cursor
cursor = db.cursor ()# Search record number of restrictions
limit = 250# SQL 查询语句
sql = "select name, code from t_player where status = 'used' limit {} offset "# Local current time
T = the time.time ()
# shrimp address query
xiamiSearchUrl = 'https://emumo.xiami.com/ajax/search-index?_={}&key='
# To read data from the database
DEF getDataFromDB (i):
, Ltd. Free Join SQL
the try:
# execute SQL statements
SQL1 = sql.format (limit) + str (limit * i)
Print (SQL1)
cursor.execute (SQL1)
# get a list of all the records
= cursor.fetchall results ()
for Row results in:
name = Row [0]
code = Row [1]
# print results
GETIMG (name, code)
the except:
( '! get picture abnormal') print# Get Picture, analysis from the web
DEF GETIMG (name, code):
, Ltd. Free Join xiamiSearchUrl
url = name + xiamiSearchUrl
rsp = requests.get (url = url, headers = headers)
HTML = rsp.text
the p-r'src = = "( //pic.xiami.net/images/artistlogo/+[^"]+\.jpg)@1e_1c_100Q_55w_55h " '
imgUrlList the re.findall = (the re.compile (P), HTML)
IF len (imgUrlList) <= 0:
print ( 'singer "' + name + '" image not found!')
# record is not acquired image of the singer, is inserted into the database
the try:
SQL2 = ( "iNSERT iNTO t_player_no_img values (null, '{}', '" . + + code " ')") the format (name) .format (code)
the cursor.execute (SQL2)
the db.commit ()
the except:
db.rollback()
the else:
# get the download image and
for imgUrl = 'HTTPS:' + imgUrlList [0]
Print ( 'Singer "' + name + '" Image Address:' + for imgUrl)
imgName code + = '.jpg'
downloadPic (for imgUrl , imgName)
# single picture download method
DEF downloadPic (for imgUrl, imgName):
the try:
R & lt requests.get = (for imgUrl)
with Open (filePath + imgName, 'WB') AS F:
f.write (r.content)
the except requests.exceptions.ConnectionError:
Print ( 'picture request error!')
return
f.close ()
# Get Image format
DEF getPicFormat (URL):
IF url.find ( 'GIF.')> = 0:
return '.gif'
elif url.find ( '. PNG')> = 0:
return '.png'
elif URL .find ( 'JEPG.')> = 0:
return '.jepg'
the else:
return '.jpg'# Create a directory developed
DEF mkdir (path):
path = path.strip ()
path = path.rstrip ( '\\')
ISEXIST = os.path.exists (path)
IF not ISEXIST:
os.makdirs (path)
the else:
print ( 'directory already exists, do not need to be repeated to create!')
# Main method
IF __name__ == '__main__':
# Create a directory
mkdir (filePath)
# shrimp query address stamped
xiamiSearchUrl = xiamiSearchUrl.format (int (t) )# Thread-locking
ThreadLock of threading.Lock = ()
# thread array
Threads = []
# multithreaded
for I in Range (20 is):
Thread myThread = (I, "the Thread-" + STR (I), I)
threads.append ( the Thread)
for TH in Threads:
th.start ()
for TH in Threads:
th.join ()
# close the database connection
db.close ()