爬虫-通用代码框架

1.百度搜索关键词提交

百度的搜索路径格式是:http://www.baidu.com/s?wd=keyword

import requests
keyword = "Python"
try:
    kv = {'wd': keyword}
    url = "http://www.baidu.com/s"
    r = requests.get(url, params=kv)   # 在url尾部插入对应搜索关键词
    # print(r.request.url)
    r.raise_for_status()
    # print(len(r.text))
catch:
    print("爬取失败")

2.网络图片爬取

import requests
import os

root = "D://pics//"
url= "http://img0.dili360.com/ga/M01/43/32/wKgBy1diUTyAJgSmADRkvfSqZo0372.tub.jpg"
path = root + url.split('/')[-1]    # 用网站图片名作为存储文件名
try:
  if not os.path.exists(root):    # 如果存储目录不存在则创建
    os.mkdir(root)
  if not os.path.exists(path):    
    r
= requests.get(url)     with open(path, 'wb') as f:
      f.write(r.content)       f.close()
      print("保存成功")
  else:
    print("文件已存在")
except:
  print("爬取失败")

3.提交IP地址到IP138查询

import requests
import os

ip = "202.204.80.112"
raw_url = ''http://www.ip138.com/ips138.asp?ip="
url = raw_url + ip
try:
  r = requests.get(url)
  r.raise_for_status()
  r.encoding = r.apparent_encoding
  print(r.text[-500:])
except:
  print("爬取失败")

猜你喜欢

转载自www.cnblogs.com/KrianJ/p/10586571.html