【python脚本】-刷CSDN博客流量

本脚本可以通过打开CSDN博客页面，来增加博客访问量。写此脚本纯粹是为了练手，想要增加访问量，写出高质量的文章才是王道。脚本如下：
#!usr/bin/python
# -*- coding: utf-8 -*-
import urllib2
import re
import time 
from bs4 import BeautifulSoup

baseUrl = 'http://blog.csdn.net'

'''
抓取页面信息
'''
def getPage(url):
    user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
    headers = {'User-Agent':user_agent}  #伪装成浏览器访问
    req = urllib2.Request(url,headers=headers)
    myResponse = urllib2.urlopen(req)
    myPage = myResponse.read()
    return myPage

'''
得到文章分页数目
'''	
def getNumber(url):
    myPage = getPage(url)
    soup = BeautifulSoup(myPage,'html.parser',from_encoding='utf-8')  #利用BeautifulSoup解析XML
    papeList = soup.find(id="papelist")
    numberList = papeList.contents[1].string.strip()
	#得到的string “ 97条  共7页”
	#用re.split可以匹配多个空格分隔
    numberStr = re.split(r'\s+', numberList)[1]
    number = numberStr[1:-1]
    return number

'''
得到所有文章标题和链接
'''
def getArticleLink(account):
    myUrl = baseUrl + '/' +account
    number = getNumber(myUrl)
    page_num = 1
    dic = {}
    while page_num <= int(number):
        url = myUrl+'/article/list/'+str(page_num)   #博客文章列表链接
        myPage = getPage(url)
        soup = BeautifulSoup(myPage,'html.parser',from_encoding='utf-8')
        for h1 in soup.find_all('h1'):
            span = h1.contents[1]
            link = span.contents[0]['href'].strip() #博客文章链接
            name = span.contents[0].string.strip()  #博客文章名称
            dic[name] = baseUrl+link
        page_num = page_num + 1
    #print dic
    return dic

'''
打开博客文章刷流量
'''
def openArticle(account,number,isAll,urlDic):
    user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'   
    refererData = 'https://www.baidu.com/s?wd=%E3%80%90Python%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%E3%80%91-%E8%87%AA%E5%8A%A8%E5%8F%91%E9%80%81%E9%82%AE%E4%BB%B6%E8%84%9A%E6%9C%AC&rsv_spt=1&rsv_iqid=0xd0c448a5000805ae&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=0&oq=%E3%80%90Python%E5%AD%A6%E4%B9%A0%E7%AC%94%E8%AE%B0%E3%80%91-%E8%87%AA%E5%8A%A8%E5%8F%91%E9%80%81%E9%82%AE%E4%BB%B6%E8%84%9A%E6%9C%AC&rsv_t=b892PEG45vPXxkNVtED7%2BXf%2BFk6gOTgA7wSkEm4698UcnIMg027x%2FbVgkQ%2BaCjgAe9DF&rsv_pq=ad3cf723000a6ce2&rsv_sug=1'      
    data = ''    
    headers = {'User-Agent' : user_agent, 'Referer' : refererData}  #伪装成浏览器访问
    count = 0
    if isAll:
        dic = getArticleLink(account)
    else:
        dic = urlDic
    while count < number:
        for key in dic:
            try:
                url = dic[key]
                print key + '\t' + str(count)
                request = urllib2.Request(url, data, headers) 
                rec = urllib2.urlopen(request)
                time.sleep(10)
            except Exception, e:
                print e
                time.sleep(120)   #发生异常，可能是访问太频繁，多等一会
                continue
        count = count+1			

	
if __name__ == "__main__":
    account = "Kevin_zhai"   #刷博客账户
    number = 1    #每篇博客刷文章次数
    isAll = True   #是否刷所有博客
    urlDic = {"【Spring学习笔记七】-Spring MVC基本配置和实例":"http://blog.csdn.net/kevin_zhai/article/details/52368420","【Spring学习笔记六】-Spring MVC框架":"http://blog.csdn.net/kevin_zhai/article/details/52279160"}
    openArticle(account,number,isAll,urlDic)
【python脚本】-刷CSDN博客流量

猜你喜欢