Python爬虫学习笔记(八)

#抓取站长素材网页简历模板 .rar压缩包  也是二进制



############################################################################
#代理ip
import requests
if __name__=="__main__":
    url='https://www.baidu.com/s?wd=ip'

    headers = {
    
    
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36"}
    response = requests.get(url=url,headers=headers,proxies={
    
    "http":"60.182.23.171:9000"})
    code  =  response.status_code
    print('状态码 :'+str(code))
    page_text = response.text
    with open('./ip.html','w',encoding='utf-8') as fp:
        fp.write(page_text)

#############################################################################
import requests
headers = {
    
    
       "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36"}
urls=['','','']
#定义两个函数
def get_Content(url):
    print('正在请求 :'+url)
    response = requests.get(url = url,headers=headers)
    if(response.status_code==200):
        return response
def parse_Content(content):
    print('长度为:'+len(content))


if __name__=="__main__":
    for url in urls:
        content = get_Content(url)# get()阻塞函数
        parse_Content(content)

猜你喜欢

转载自blog.csdn.net/Kaaaakaki/article/details/109104134